diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-20 14:29:32 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-20 14:29:32 -0400 |
commit | 12e24f34cb0d55efd08c18b2112507d4bf498008 (patch) | |
tree | 83b07be17b8ef45f42360a3b9159b3aaae3fbad4 | |
parent | 1eb51c33b21ffa3fceb634d1d6bcd6488c79bc26 (diff) | |
parent | eadc84cc01e04f9f74ec2de0c9355be035c7b396 (diff) |
Merge branch 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perfcounters-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (49 commits)
perfcounter: Handle some IO return values
perf_counter: Push perf_sample_data through the swcounter code
perf_counter tools: Define and use our own u64, s64 etc. definitions
perf_counter: Close race in perf_lock_task_context()
perf_counter, x86: Improve interactions with fast-gup
perf_counter: Simplify and fix task migration counting
perf_counter tools: Add a data file header
perf_counter: Update userspace callchain sampling uses
perf_counter: Make callchain samples extensible
perf report: Filter to parent set by default
perf_counter tools: Handle lost events
perf_counter: Add event overlow handling
fs: Provide empty .set_page_dirty() aop for anon inodes
perf_counter: tools: Makefile tweaks for 64-bit powerpc
perf_counter: powerpc: Add processor back-end for MPC7450 family
perf_counter: powerpc: Make powerpc perf_counter code safe for 32-bit kernels
perf_counter: powerpc: Change how processor-specific back-ends get selected
perf_counter: powerpc: Use unsigned long for register and constraint values
perf_counter: powerpc: Enable use of software counters on 32-bit powerpc
perf_counter tools: Add and use isprint()
...
40 files changed, 2321 insertions, 892 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 9fb344d5a86a..bf6cedfa05db 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
@@ -126,6 +126,7 @@ config PPC | |||
126 | select HAVE_OPROFILE | 126 | select HAVE_OPROFILE |
127 | select HAVE_SYSCALL_WRAPPERS if PPC64 | 127 | select HAVE_SYSCALL_WRAPPERS if PPC64 |
128 | select GENERIC_ATOMIC64 if PPC32 | 128 | select GENERIC_ATOMIC64 if PPC32 |
129 | select HAVE_PERF_COUNTERS | ||
129 | 130 | ||
130 | config EARLY_PRINTK | 131 | config EARLY_PRINTK |
131 | bool | 132 | bool |
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index b7f8f4a87cc0..867ab8ed69b3 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h | |||
@@ -131,6 +131,8 @@ static inline int irqs_disabled_flags(unsigned long flags) | |||
131 | struct irq_chip; | 131 | struct irq_chip; |
132 | 132 | ||
133 | #ifdef CONFIG_PERF_COUNTERS | 133 | #ifdef CONFIG_PERF_COUNTERS |
134 | |||
135 | #ifdef CONFIG_PPC64 | ||
134 | static inline unsigned long test_perf_counter_pending(void) | 136 | static inline unsigned long test_perf_counter_pending(void) |
135 | { | 137 | { |
136 | unsigned long x; | 138 | unsigned long x; |
@@ -154,15 +156,15 @@ static inline void clear_perf_counter_pending(void) | |||
154 | "r" (0), | 156 | "r" (0), |
155 | "i" (offsetof(struct paca_struct, perf_counter_pending))); | 157 | "i" (offsetof(struct paca_struct, perf_counter_pending))); |
156 | } | 158 | } |
159 | #endif /* CONFIG_PPC64 */ | ||
157 | 160 | ||
158 | #else | 161 | #else /* CONFIG_PERF_COUNTERS */ |
159 | 162 | ||
160 | static inline unsigned long test_perf_counter_pending(void) | 163 | static inline unsigned long test_perf_counter_pending(void) |
161 | { | 164 | { |
162 | return 0; | 165 | return 0; |
163 | } | 166 | } |
164 | 167 | ||
165 | static inline void set_perf_counter_pending(void) {} | ||
166 | static inline void clear_perf_counter_pending(void) {} | 168 | static inline void clear_perf_counter_pending(void) {} |
167 | #endif /* CONFIG_PERF_COUNTERS */ | 169 | #endif /* CONFIG_PERF_COUNTERS */ |
168 | 170 | ||
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h index cc7c887705b8..8ccd4e155768 100644 --- a/arch/powerpc/include/asm/perf_counter.h +++ b/arch/powerpc/include/asm/perf_counter.h | |||
@@ -10,6 +10,8 @@ | |||
10 | */ | 10 | */ |
11 | #include <linux/types.h> | 11 | #include <linux/types.h> |
12 | 12 | ||
13 | #include <asm/hw_irq.h> | ||
14 | |||
13 | #define MAX_HWCOUNTERS 8 | 15 | #define MAX_HWCOUNTERS 8 |
14 | #define MAX_EVENT_ALTERNATIVES 8 | 16 | #define MAX_EVENT_ALTERNATIVES 8 |
15 | #define MAX_LIMITED_HWCOUNTERS 2 | 17 | #define MAX_LIMITED_HWCOUNTERS 2 |
@@ -19,27 +21,27 @@ | |||
19 | * describe the PMU on a particular POWER-family CPU. | 21 | * describe the PMU on a particular POWER-family CPU. |
20 | */ | 22 | */ |
21 | struct power_pmu { | 23 | struct power_pmu { |
22 | int n_counter; | 24 | const char *name; |
23 | int max_alternatives; | 25 | int n_counter; |
24 | u64 add_fields; | 26 | int max_alternatives; |
25 | u64 test_adder; | 27 | unsigned long add_fields; |
26 | int (*compute_mmcr)(u64 events[], int n_ev, | 28 | unsigned long test_adder; |
27 | unsigned int hwc[], u64 mmcr[]); | 29 | int (*compute_mmcr)(u64 events[], int n_ev, |
28 | int (*get_constraint)(u64 event, u64 *mskp, u64 *valp); | 30 | unsigned int hwc[], unsigned long mmcr[]); |
29 | int (*get_alternatives)(u64 event, unsigned int flags, | 31 | int (*get_constraint)(u64 event, unsigned long *mskp, |
30 | u64 alt[]); | 32 | unsigned long *valp); |
31 | void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); | 33 | int (*get_alternatives)(u64 event, unsigned int flags, |
32 | int (*limited_pmc_event)(u64 event); | 34 | u64 alt[]); |
33 | u32 flags; | 35 | void (*disable_pmc)(unsigned int pmc, unsigned long mmcr[]); |
34 | int n_generic; | 36 | int (*limited_pmc_event)(u64 event); |
35 | int *generic_events; | 37 | u32 flags; |
36 | int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] | 38 | int n_generic; |
39 | int *generic_events; | ||
40 | int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] | ||
37 | [PERF_COUNT_HW_CACHE_OP_MAX] | 41 | [PERF_COUNT_HW_CACHE_OP_MAX] |
38 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | 42 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
39 | }; | 43 | }; |
40 | 44 | ||
41 | extern struct power_pmu *ppmu; | ||
42 | |||
43 | /* | 45 | /* |
44 | * Values for power_pmu.flags | 46 | * Values for power_pmu.flags |
45 | */ | 47 | */ |
@@ -53,15 +55,23 @@ extern struct power_pmu *ppmu; | |||
53 | #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ | 55 | #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ |
54 | #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ | 56 | #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ |
55 | 57 | ||
58 | extern int register_power_pmu(struct power_pmu *); | ||
59 | |||
56 | struct pt_regs; | 60 | struct pt_regs; |
57 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | 61 | extern unsigned long perf_misc_flags(struct pt_regs *regs); |
58 | #define perf_misc_flags(regs) perf_misc_flags(regs) | ||
59 | |||
60 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | 62 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); |
61 | 63 | ||
62 | /* | 64 | /* |
63 | * The power_pmu.get_constraint function returns a 64-bit value and | 65 | * Only override the default definitions in include/linux/perf_counter.h |
64 | * a 64-bit mask that express the constraints between this event and | 66 | * if we have hardware PMU support. |
67 | */ | ||
68 | #ifdef CONFIG_PPC_PERF_CTRS | ||
69 | #define perf_misc_flags(regs) perf_misc_flags(regs) | ||
70 | #endif | ||
71 | |||
72 | /* | ||
73 | * The power_pmu.get_constraint function returns a 32/64-bit value and | ||
74 | * a 32/64-bit mask that express the constraints between this event and | ||
65 | * other events. | 75 | * other events. |
66 | * | 76 | * |
67 | * The value and mask are divided up into (non-overlapping) bitfields | 77 | * The value and mask are divided up into (non-overlapping) bitfields |
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 6a4fb29a0618..b73396b93905 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile | |||
@@ -97,9 +97,10 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o | |||
97 | 97 | ||
98 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | 98 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o |
99 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o | 99 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o |
100 | obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \ | 100 | obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o |
101 | power5-pmu.o power5+-pmu.o power6-pmu.o \ | 101 | obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ |
102 | power7-pmu.o | 102 | power5+-pmu.o power6-pmu.o power7-pmu.o |
103 | obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o | ||
103 | 104 | ||
104 | obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o | 105 | obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o |
105 | 106 | ||
@@ -108,6 +109,7 @@ obj-y += iomap.o | |||
108 | endif | 109 | endif |
109 | 110 | ||
110 | obj-$(CONFIG_PPC64) += $(obj64-y) | 111 | obj-$(CONFIG_PPC64) += $(obj64-y) |
112 | obj-$(CONFIG_PPC32) += $(obj32-y) | ||
111 | 113 | ||
112 | ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) | 114 | ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) |
113 | obj-y += ppc_save_regs.o | 115 | obj-y += ppc_save_regs.o |
diff --git a/arch/powerpc/kernel/mpc7450-pmu.c b/arch/powerpc/kernel/mpc7450-pmu.c new file mode 100644 index 000000000000..75ff47fed7bf --- /dev/null +++ b/arch/powerpc/kernel/mpc7450-pmu.c | |||
@@ -0,0 +1,417 @@ | |||
1 | /* | ||
2 | * Performance counter support for MPC7450-family processors. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/string.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <asm/reg.h> | ||
15 | #include <asm/cputable.h> | ||
16 | |||
17 | #define N_COUNTER 6 /* Number of hardware counters */ | ||
18 | #define MAX_ALT 3 /* Maximum number of event alternative codes */ | ||
19 | |||
20 | /* | ||
21 | * Bits in event code for MPC7450 family | ||
22 | */ | ||
23 | #define PM_THRMULT_MSKS 0x40000 | ||
24 | #define PM_THRESH_SH 12 | ||
25 | #define PM_THRESH_MSK 0x3f | ||
26 | #define PM_PMC_SH 8 | ||
27 | #define PM_PMC_MSK 7 | ||
28 | #define PM_PMCSEL_MSK 0x7f | ||
29 | |||
30 | /* | ||
31 | * Classify events according to how specific their PMC requirements are. | ||
32 | * Result is: | ||
33 | * 0: can go on any PMC | ||
34 | * 1: can go on PMCs 1-4 | ||
35 | * 2: can go on PMCs 1,2,4 | ||
36 | * 3: can go on PMCs 1 or 2 | ||
37 | * 4: can only go on one PMC | ||
38 | * -1: event code is invalid | ||
39 | */ | ||
40 | #define N_CLASSES 5 | ||
41 | |||
42 | static int mpc7450_classify_event(u32 event) | ||
43 | { | ||
44 | int pmc; | ||
45 | |||
46 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
47 | if (pmc) { | ||
48 | if (pmc > N_COUNTER) | ||
49 | return -1; | ||
50 | return 4; | ||
51 | } | ||
52 | event &= PM_PMCSEL_MSK; | ||
53 | if (event <= 1) | ||
54 | return 0; | ||
55 | if (event <= 7) | ||
56 | return 1; | ||
57 | if (event <= 13) | ||
58 | return 2; | ||
59 | if (event <= 22) | ||
60 | return 3; | ||
61 | return -1; | ||
62 | } | ||
63 | |||
64 | /* | ||
65 | * Events using threshold and possible threshold scale: | ||
66 | * code scale? name | ||
67 | * 11e N PM_INSTQ_EXCEED_CYC | ||
68 | * 11f N PM_ALTV_IQ_EXCEED_CYC | ||
69 | * 128 Y PM_DTLB_SEARCH_EXCEED_CYC | ||
70 | * 12b Y PM_LD_MISS_EXCEED_L1_CYC | ||
71 | * 220 N PM_CQ_EXCEED_CYC | ||
72 | * 30c N PM_GPR_RB_EXCEED_CYC | ||
73 | * 30d ? PM_FPR_IQ_EXCEED_CYC ? | ||
74 | * 311 Y PM_ITLB_SEARCH_EXCEED | ||
75 | * 410 N PM_GPR_IQ_EXCEED_CYC | ||
76 | */ | ||
77 | |||
78 | /* | ||
79 | * Return use of threshold and threshold scale bits: | ||
80 | * 0 = uses neither, 1 = uses threshold, 2 = uses both | ||
81 | */ | ||
82 | static int mpc7450_threshold_use(u32 event) | ||
83 | { | ||
84 | int pmc, sel; | ||
85 | |||
86 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
87 | sel = event & PM_PMCSEL_MSK; | ||
88 | switch (pmc) { | ||
89 | case 1: | ||
90 | if (sel == 0x1e || sel == 0x1f) | ||
91 | return 1; | ||
92 | if (sel == 0x28 || sel == 0x2b) | ||
93 | return 2; | ||
94 | break; | ||
95 | case 2: | ||
96 | if (sel == 0x20) | ||
97 | return 1; | ||
98 | break; | ||
99 | case 3: | ||
100 | if (sel == 0xc || sel == 0xd) | ||
101 | return 1; | ||
102 | if (sel == 0x11) | ||
103 | return 2; | ||
104 | break; | ||
105 | case 4: | ||
106 | if (sel == 0x10) | ||
107 | return 1; | ||
108 | break; | ||
109 | } | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | /* | ||
114 | * Layout of constraint bits: | ||
115 | * 33222222222211111111110000000000 | ||
116 | * 10987654321098765432109876543210 | ||
117 | * |< >< > < > < ><><><><><><> | ||
118 | * TS TV G4 G3 G2P6P5P4P3P2P1 | ||
119 | * | ||
120 | * P1 - P6 | ||
121 | * 0 - 11: Count of events needing PMC1 .. PMC6 | ||
122 | * | ||
123 | * G2 | ||
124 | * 12 - 14: Count of events needing PMC1 or PMC2 | ||
125 | * | ||
126 | * G3 | ||
127 | * 16 - 18: Count of events needing PMC1, PMC2 or PMC4 | ||
128 | * | ||
129 | * G4 | ||
130 | * 20 - 23: Count of events needing PMC1, PMC2, PMC3 or PMC4 | ||
131 | * | ||
132 | * TV | ||
133 | * 24 - 29: Threshold value requested | ||
134 | * | ||
135 | * TS | ||
136 | * 30: Threshold scale value requested | ||
137 | */ | ||
138 | |||
139 | static u32 pmcbits[N_COUNTER][2] = { | ||
140 | { 0x00844002, 0x00111001 }, /* PMC1 mask, value: P1,G2,G3,G4 */ | ||
141 | { 0x00844008, 0x00111004 }, /* PMC2: P2,G2,G3,G4 */ | ||
142 | { 0x00800020, 0x00100010 }, /* PMC3: P3,G4 */ | ||
143 | { 0x00840080, 0x00110040 }, /* PMC4: P4,G3,G4 */ | ||
144 | { 0x00000200, 0x00000100 }, /* PMC5: P5 */ | ||
145 | { 0x00000800, 0x00000400 } /* PMC6: P6 */ | ||
146 | }; | ||
147 | |||
148 | static u32 classbits[N_CLASSES - 1][2] = { | ||
149 | { 0x00000000, 0x00000000 }, /* class 0: no constraint */ | ||
150 | { 0x00800000, 0x00100000 }, /* class 1: G4 */ | ||
151 | { 0x00040000, 0x00010000 }, /* class 2: G3 */ | ||
152 | { 0x00004000, 0x00001000 }, /* class 3: G2 */ | ||
153 | }; | ||
154 | |||
155 | static int mpc7450_get_constraint(u64 event, unsigned long *maskp, | ||
156 | unsigned long *valp) | ||
157 | { | ||
158 | int pmc, class; | ||
159 | u32 mask, value; | ||
160 | int thresh, tuse; | ||
161 | |||
162 | class = mpc7450_classify_event(event); | ||
163 | if (class < 0) | ||
164 | return -1; | ||
165 | if (class == 4) { | ||
166 | pmc = ((unsigned int)event >> PM_PMC_SH) & PM_PMC_MSK; | ||
167 | mask = pmcbits[pmc - 1][0]; | ||
168 | value = pmcbits[pmc - 1][1]; | ||
169 | } else { | ||
170 | mask = classbits[class][0]; | ||
171 | value = classbits[class][1]; | ||
172 | } | ||
173 | |||
174 | tuse = mpc7450_threshold_use(event); | ||
175 | if (tuse) { | ||
176 | thresh = ((unsigned int)event >> PM_THRESH_SH) & PM_THRESH_MSK; | ||
177 | mask |= 0x3f << 24; | ||
178 | value |= thresh << 24; | ||
179 | if (tuse == 2) { | ||
180 | mask |= 0x40000000; | ||
181 | if ((unsigned int)event & PM_THRMULT_MSKS) | ||
182 | value |= 0x40000000; | ||
183 | } | ||
184 | } | ||
185 | |||
186 | *maskp = mask; | ||
187 | *valp = value; | ||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
192 | { 0x217, 0x317 }, /* PM_L1_DCACHE_MISS */ | ||
193 | { 0x418, 0x50f, 0x60f }, /* PM_SNOOP_RETRY */ | ||
194 | { 0x502, 0x602 }, /* PM_L2_HIT */ | ||
195 | { 0x503, 0x603 }, /* PM_L3_HIT */ | ||
196 | { 0x504, 0x604 }, /* PM_L2_ICACHE_MISS */ | ||
197 | { 0x505, 0x605 }, /* PM_L3_ICACHE_MISS */ | ||
198 | { 0x506, 0x606 }, /* PM_L2_DCACHE_MISS */ | ||
199 | { 0x507, 0x607 }, /* PM_L3_DCACHE_MISS */ | ||
200 | { 0x50a, 0x623 }, /* PM_LD_HIT_L3 */ | ||
201 | { 0x50b, 0x624 }, /* PM_ST_HIT_L3 */ | ||
202 | { 0x50d, 0x60d }, /* PM_L2_TOUCH_HIT */ | ||
203 | { 0x50e, 0x60e }, /* PM_L3_TOUCH_HIT */ | ||
204 | { 0x512, 0x612 }, /* PM_INT_LOCAL */ | ||
205 | { 0x513, 0x61d }, /* PM_L2_MISS */ | ||
206 | { 0x514, 0x61e }, /* PM_L3_MISS */ | ||
207 | }; | ||
208 | |||
209 | /* | ||
210 | * Scan the alternatives table for a match and return the | ||
211 | * index into the alternatives table if found, else -1. | ||
212 | */ | ||
213 | static int find_alternative(u32 event) | ||
214 | { | ||
215 | int i, j; | ||
216 | |||
217 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
218 | if (event < event_alternatives[i][0]) | ||
219 | break; | ||
220 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
221 | if (event == event_alternatives[i][j]) | ||
222 | return i; | ||
223 | } | ||
224 | return -1; | ||
225 | } | ||
226 | |||
227 | static int mpc7450_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
228 | { | ||
229 | int i, j, nalt = 1; | ||
230 | u32 ae; | ||
231 | |||
232 | alt[0] = event; | ||
233 | nalt = 1; | ||
234 | i = find_alternative((u32)event); | ||
235 | if (i >= 0) { | ||
236 | for (j = 0; j < MAX_ALT; ++j) { | ||
237 | ae = event_alternatives[i][j]; | ||
238 | if (ae && ae != (u32)event) | ||
239 | alt[nalt++] = ae; | ||
240 | } | ||
241 | } | ||
242 | return nalt; | ||
243 | } | ||
244 | |||
245 | /* | ||
246 | * Bitmaps of which PMCs each class can use for classes 0 - 3. | ||
247 | * Bit i is set if PMC i+1 is usable. | ||
248 | */ | ||
249 | static const u8 classmap[N_CLASSES] = { | ||
250 | 0x3f, 0x0f, 0x0b, 0x03, 0 | ||
251 | }; | ||
252 | |||
253 | /* Bit position and width of each PMCSEL field */ | ||
254 | static const int pmcsel_shift[N_COUNTER] = { | ||
255 | 6, 0, 27, 22, 17, 11 | ||
256 | }; | ||
257 | static const u32 pmcsel_mask[N_COUNTER] = { | ||
258 | 0x7f, 0x3f, 0x1f, 0x1f, 0x1f, 0x3f | ||
259 | }; | ||
260 | |||
261 | /* | ||
262 | * Compute MMCR0/1/2 values for a set of events. | ||
263 | */ | ||
264 | static int mpc7450_compute_mmcr(u64 event[], int n_ev, | ||
265 | unsigned int hwc[], unsigned long mmcr[]) | ||
266 | { | ||
267 | u8 event_index[N_CLASSES][N_COUNTER]; | ||
268 | int n_classevent[N_CLASSES]; | ||
269 | int i, j, class, tuse; | ||
270 | u32 pmc_inuse = 0, pmc_avail; | ||
271 | u32 mmcr0 = 0, mmcr1 = 0, mmcr2 = 0; | ||
272 | u32 ev, pmc, thresh; | ||
273 | |||
274 | if (n_ev > N_COUNTER) | ||
275 | return -1; | ||
276 | |||
277 | /* First pass: count usage in each class */ | ||
278 | for (i = 0; i < N_CLASSES; ++i) | ||
279 | n_classevent[i] = 0; | ||
280 | for (i = 0; i < n_ev; ++i) { | ||
281 | class = mpc7450_classify_event(event[i]); | ||
282 | if (class < 0) | ||
283 | return -1; | ||
284 | j = n_classevent[class]++; | ||
285 | event_index[class][j] = i; | ||
286 | } | ||
287 | |||
288 | /* Second pass: allocate PMCs from most specific event to least */ | ||
289 | for (class = N_CLASSES - 1; class >= 0; --class) { | ||
290 | for (i = 0; i < n_classevent[class]; ++i) { | ||
291 | ev = event[event_index[class][i]]; | ||
292 | if (class == 4) { | ||
293 | pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; | ||
294 | if (pmc_inuse & (1 << (pmc - 1))) | ||
295 | return -1; | ||
296 | } else { | ||
297 | /* Find a suitable PMC */ | ||
298 | pmc_avail = classmap[class] & ~pmc_inuse; | ||
299 | if (!pmc_avail) | ||
300 | return -1; | ||
301 | pmc = ffs(pmc_avail); | ||
302 | } | ||
303 | pmc_inuse |= 1 << (pmc - 1); | ||
304 | |||
305 | tuse = mpc7450_threshold_use(ev); | ||
306 | if (tuse) { | ||
307 | thresh = (ev >> PM_THRESH_SH) & PM_THRESH_MSK; | ||
308 | mmcr0 |= thresh << 16; | ||
309 | if (tuse == 2 && (ev & PM_THRMULT_MSKS)) | ||
310 | mmcr2 = 0x80000000; | ||
311 | } | ||
312 | ev &= pmcsel_mask[pmc - 1]; | ||
313 | ev <<= pmcsel_shift[pmc - 1]; | ||
314 | if (pmc <= 2) | ||
315 | mmcr0 |= ev; | ||
316 | else | ||
317 | mmcr1 |= ev; | ||
318 | hwc[event_index[class][i]] = pmc - 1; | ||
319 | } | ||
320 | } | ||
321 | |||
322 | if (pmc_inuse & 1) | ||
323 | mmcr0 |= MMCR0_PMC1CE; | ||
324 | if (pmc_inuse & 0x3e) | ||
325 | mmcr0 |= MMCR0_PMCnCE; | ||
326 | |||
327 | /* Return MMCRx values */ | ||
328 | mmcr[0] = mmcr0; | ||
329 | mmcr[1] = mmcr1; | ||
330 | mmcr[2] = mmcr2; | ||
331 | return 0; | ||
332 | } | ||
333 | |||
334 | /* | ||
335 | * Disable counting by a PMC. | ||
336 | * Note that the pmc argument is 0-based here, not 1-based. | ||
337 | */ | ||
338 | static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[]) | ||
339 | { | ||
340 | if (pmc <= 1) | ||
341 | mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); | ||
342 | else | ||
343 | mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]); | ||
344 | } | ||
345 | |||
346 | static int mpc7450_generic_events[] = { | ||
347 | [PERF_COUNT_HW_CPU_CYCLES] = 1, | ||
348 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, | ||
349 | [PERF_COUNT_HW_CACHE_MISSES] = 0x217, /* PM_L1_DCACHE_MISS */ | ||
350 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x122, /* PM_BR_CMPL */ | ||
351 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x41c, /* PM_BR_MPRED */ | ||
352 | }; | ||
353 | |||
354 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
355 | |||
356 | /* | ||
357 | * Table of generalized cache-related events. | ||
358 | * 0 means not supported, -1 means nonsensical, other values | ||
359 | * are event codes. | ||
360 | */ | ||
361 | static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
362 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
363 | [C(OP_READ)] = { 0, 0x225 }, | ||
364 | [C(OP_WRITE)] = { 0, 0x227 }, | ||
365 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
366 | }, | ||
367 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
368 | [C(OP_READ)] = { 0x129, 0x115 }, | ||
369 | [C(OP_WRITE)] = { -1, -1 }, | ||
370 | [C(OP_PREFETCH)] = { 0x634, 0 }, | ||
371 | }, | ||
372 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
373 | [C(OP_READ)] = { 0, 0 }, | ||
374 | [C(OP_WRITE)] = { 0, 0 }, | ||
375 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
376 | }, | ||
377 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
378 | [C(OP_READ)] = { 0, 0x312 }, | ||
379 | [C(OP_WRITE)] = { -1, -1 }, | ||
380 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
381 | }, | ||
382 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
383 | [C(OP_READ)] = { 0, 0x223 }, | ||
384 | [C(OP_WRITE)] = { -1, -1 }, | ||
385 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
386 | }, | ||
387 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
388 | [C(OP_READ)] = { 0x122, 0x41c }, | ||
389 | [C(OP_WRITE)] = { -1, -1 }, | ||
390 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
391 | }, | ||
392 | }; | ||
393 | |||
394 | struct power_pmu mpc7450_pmu = { | ||
395 | .name = "MPC7450 family", | ||
396 | .n_counter = N_COUNTER, | ||
397 | .max_alternatives = MAX_ALT, | ||
398 | .add_fields = 0x00111555ul, | ||
399 | .test_adder = 0x00301000ul, | ||
400 | .compute_mmcr = mpc7450_compute_mmcr, | ||
401 | .get_constraint = mpc7450_get_constraint, | ||
402 | .get_alternatives = mpc7450_get_alternatives, | ||
403 | .disable_pmc = mpc7450_disable_pmc, | ||
404 | .n_generic = ARRAY_SIZE(mpc7450_generic_events), | ||
405 | .generic_events = mpc7450_generic_events, | ||
406 | .cache_events = &mpc7450_cache_events, | ||
407 | }; | ||
408 | |||
409 | static int init_mpc7450_pmu(void) | ||
410 | { | ||
411 | if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450")) | ||
412 | return -ENODEV; | ||
413 | |||
414 | return register_power_pmu(&mpc7450_pmu); | ||
415 | } | ||
416 | |||
417 | arch_initcall(init_mpc7450_pmu); | ||
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c index bb202388170e..809fdf94b95f 100644 --- a/arch/powerpc/kernel/perf_counter.c +++ b/arch/powerpc/kernel/perf_counter.c | |||
@@ -29,7 +29,7 @@ struct cpu_hw_counters { | |||
29 | struct perf_counter *counter[MAX_HWCOUNTERS]; | 29 | struct perf_counter *counter[MAX_HWCOUNTERS]; |
30 | u64 events[MAX_HWCOUNTERS]; | 30 | u64 events[MAX_HWCOUNTERS]; |
31 | unsigned int flags[MAX_HWCOUNTERS]; | 31 | unsigned int flags[MAX_HWCOUNTERS]; |
32 | u64 mmcr[3]; | 32 | unsigned long mmcr[3]; |
33 | struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; | 33 | struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; |
34 | u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; | 34 | u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; |
35 | }; | 35 | }; |
@@ -46,6 +46,115 @@ struct power_pmu *ppmu; | |||
46 | */ | 46 | */ |
47 | static unsigned int freeze_counters_kernel = MMCR0_FCS; | 47 | static unsigned int freeze_counters_kernel = MMCR0_FCS; |
48 | 48 | ||
49 | /* | ||
50 | * 32-bit doesn't have MMCRA but does have an MMCR2, | ||
51 | * and a few other names are different. | ||
52 | */ | ||
53 | #ifdef CONFIG_PPC32 | ||
54 | |||
55 | #define MMCR0_FCHV 0 | ||
56 | #define MMCR0_PMCjCE MMCR0_PMCnCE | ||
57 | |||
58 | #define SPRN_MMCRA SPRN_MMCR2 | ||
59 | #define MMCRA_SAMPLE_ENABLE 0 | ||
60 | |||
61 | static inline unsigned long perf_ip_adjust(struct pt_regs *regs) | ||
62 | { | ||
63 | return 0; | ||
64 | } | ||
65 | static inline void perf_set_pmu_inuse(int inuse) { } | ||
66 | static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { } | ||
67 | static inline u32 perf_get_misc_flags(struct pt_regs *regs) | ||
68 | { | ||
69 | return 0; | ||
70 | } | ||
71 | static inline void perf_read_regs(struct pt_regs *regs) { } | ||
72 | static inline int perf_intr_is_nmi(struct pt_regs *regs) | ||
73 | { | ||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | #endif /* CONFIG_PPC32 */ | ||
78 | |||
79 | /* | ||
80 | * Things that are specific to 64-bit implementations. | ||
81 | */ | ||
82 | #ifdef CONFIG_PPC64 | ||
83 | |||
84 | static inline unsigned long perf_ip_adjust(struct pt_regs *regs) | ||
85 | { | ||
86 | unsigned long mmcra = regs->dsisr; | ||
87 | |||
88 | if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { | ||
89 | unsigned long slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; | ||
90 | if (slot > 1) | ||
91 | return 4 * (slot - 1); | ||
92 | } | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | static inline void perf_set_pmu_inuse(int inuse) | ||
97 | { | ||
98 | get_lppaca()->pmcregs_in_use = inuse; | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * The user wants a data address recorded. | ||
103 | * If we're not doing instruction sampling, give them the SDAR | ||
104 | * (sampled data address). If we are doing instruction sampling, then | ||
105 | * only give them the SDAR if it corresponds to the instruction | ||
106 | * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC | ||
107 | * bit in MMCRA. | ||
108 | */ | ||
109 | static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) | ||
110 | { | ||
111 | unsigned long mmcra = regs->dsisr; | ||
112 | unsigned long sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? | ||
113 | POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; | ||
114 | |||
115 | if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) | ||
116 | *addrp = mfspr(SPRN_SDAR); | ||
117 | } | ||
118 | |||
119 | static inline u32 perf_get_misc_flags(struct pt_regs *regs) | ||
120 | { | ||
121 | unsigned long mmcra = regs->dsisr; | ||
122 | |||
123 | if (TRAP(regs) != 0xf00) | ||
124 | return 0; /* not a PMU interrupt */ | ||
125 | |||
126 | if (ppmu->flags & PPMU_ALT_SIPR) { | ||
127 | if (mmcra & POWER6_MMCRA_SIHV) | ||
128 | return PERF_EVENT_MISC_HYPERVISOR; | ||
129 | return (mmcra & POWER6_MMCRA_SIPR) ? | ||
130 | PERF_EVENT_MISC_USER : PERF_EVENT_MISC_KERNEL; | ||
131 | } | ||
132 | if (mmcra & MMCRA_SIHV) | ||
133 | return PERF_EVENT_MISC_HYPERVISOR; | ||
134 | return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER : | ||
135 | PERF_EVENT_MISC_KERNEL; | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * Overload regs->dsisr to store MMCRA so we only need to read it once | ||
140 | * on each interrupt. | ||
141 | */ | ||
142 | static inline void perf_read_regs(struct pt_regs *regs) | ||
143 | { | ||
144 | regs->dsisr = mfspr(SPRN_MMCRA); | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * If interrupts were soft-disabled when a PMU interrupt occurs, treat | ||
149 | * it as an NMI. | ||
150 | */ | ||
151 | static inline int perf_intr_is_nmi(struct pt_regs *regs) | ||
152 | { | ||
153 | return !regs->softe; | ||
154 | } | ||
155 | |||
156 | #endif /* CONFIG_PPC64 */ | ||
157 | |||
49 | static void perf_counter_interrupt(struct pt_regs *regs); | 158 | static void perf_counter_interrupt(struct pt_regs *regs); |
50 | 159 | ||
51 | void perf_counter_print_debug(void) | 160 | void perf_counter_print_debug(void) |
@@ -78,12 +187,14 @@ static unsigned long read_pmc(int idx) | |||
78 | case 6: | 187 | case 6: |
79 | val = mfspr(SPRN_PMC6); | 188 | val = mfspr(SPRN_PMC6); |
80 | break; | 189 | break; |
190 | #ifdef CONFIG_PPC64 | ||
81 | case 7: | 191 | case 7: |
82 | val = mfspr(SPRN_PMC7); | 192 | val = mfspr(SPRN_PMC7); |
83 | break; | 193 | break; |
84 | case 8: | 194 | case 8: |
85 | val = mfspr(SPRN_PMC8); | 195 | val = mfspr(SPRN_PMC8); |
86 | break; | 196 | break; |
197 | #endif /* CONFIG_PPC64 */ | ||
87 | default: | 198 | default: |
88 | printk(KERN_ERR "oops trying to read PMC%d\n", idx); | 199 | printk(KERN_ERR "oops trying to read PMC%d\n", idx); |
89 | val = 0; | 200 | val = 0; |
@@ -115,12 +226,14 @@ static void write_pmc(int idx, unsigned long val) | |||
115 | case 6: | 226 | case 6: |
116 | mtspr(SPRN_PMC6, val); | 227 | mtspr(SPRN_PMC6, val); |
117 | break; | 228 | break; |
229 | #ifdef CONFIG_PPC64 | ||
118 | case 7: | 230 | case 7: |
119 | mtspr(SPRN_PMC7, val); | 231 | mtspr(SPRN_PMC7, val); |
120 | break; | 232 | break; |
121 | case 8: | 233 | case 8: |
122 | mtspr(SPRN_PMC8, val); | 234 | mtspr(SPRN_PMC8, val); |
123 | break; | 235 | break; |
236 | #endif /* CONFIG_PPC64 */ | ||
124 | default: | 237 | default: |
125 | printk(KERN_ERR "oops trying to write PMC%d\n", idx); | 238 | printk(KERN_ERR "oops trying to write PMC%d\n", idx); |
126 | } | 239 | } |
@@ -135,15 +248,15 @@ static void write_pmc(int idx, unsigned long val) | |||
135 | static int power_check_constraints(u64 event[], unsigned int cflags[], | 248 | static int power_check_constraints(u64 event[], unsigned int cflags[], |
136 | int n_ev) | 249 | int n_ev) |
137 | { | 250 | { |
138 | u64 mask, value, nv; | 251 | unsigned long mask, value, nv; |
139 | u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | 252 | u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; |
140 | u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | 253 | unsigned long amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; |
141 | u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | 254 | unsigned long avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; |
142 | u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; | 255 | unsigned long smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; |
143 | int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; | 256 | int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; |
144 | int i, j; | 257 | int i, j; |
145 | u64 addf = ppmu->add_fields; | 258 | unsigned long addf = ppmu->add_fields; |
146 | u64 tadd = ppmu->test_adder; | 259 | unsigned long tadd = ppmu->test_adder; |
147 | 260 | ||
148 | if (n_ev > ppmu->n_counter) | 261 | if (n_ev > ppmu->n_counter) |
149 | return -1; | 262 | return -1; |
@@ -283,7 +396,7 @@ static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[], | |||
283 | 396 | ||
284 | static void power_pmu_read(struct perf_counter *counter) | 397 | static void power_pmu_read(struct perf_counter *counter) |
285 | { | 398 | { |
286 | long val, delta, prev; | 399 | s64 val, delta, prev; |
287 | 400 | ||
288 | if (!counter->hw.idx) | 401 | if (!counter->hw.idx) |
289 | return; | 402 | return; |
@@ -403,14 +516,12 @@ static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0) | |||
403 | void hw_perf_disable(void) | 516 | void hw_perf_disable(void) |
404 | { | 517 | { |
405 | struct cpu_hw_counters *cpuhw; | 518 | struct cpu_hw_counters *cpuhw; |
406 | unsigned long ret; | ||
407 | unsigned long flags; | 519 | unsigned long flags; |
408 | 520 | ||
409 | local_irq_save(flags); | 521 | local_irq_save(flags); |
410 | cpuhw = &__get_cpu_var(cpu_hw_counters); | 522 | cpuhw = &__get_cpu_var(cpu_hw_counters); |
411 | 523 | ||
412 | ret = cpuhw->disabled; | 524 | if (!cpuhw->disabled) { |
413 | if (!ret) { | ||
414 | cpuhw->disabled = 1; | 525 | cpuhw->disabled = 1; |
415 | cpuhw->n_added = 0; | 526 | cpuhw->n_added = 0; |
416 | 527 | ||
@@ -479,7 +590,7 @@ void hw_perf_enable(void) | |||
479 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | 590 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); |
480 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); | 591 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); |
481 | if (cpuhw->n_counters == 0) | 592 | if (cpuhw->n_counters == 0) |
482 | get_lppaca()->pmcregs_in_use = 0; | 593 | perf_set_pmu_inuse(0); |
483 | goto out_enable; | 594 | goto out_enable; |
484 | } | 595 | } |
485 | 596 | ||
@@ -512,7 +623,7 @@ void hw_perf_enable(void) | |||
512 | * bit set and set the hardware counters to their initial values. | 623 | * bit set and set the hardware counters to their initial values. |
513 | * Then unfreeze the counters. | 624 | * Then unfreeze the counters. |
514 | */ | 625 | */ |
515 | get_lppaca()->pmcregs_in_use = 1; | 626 | perf_set_pmu_inuse(1); |
516 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | 627 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); |
517 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); | 628 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); |
518 | mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) | 629 | mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) |
@@ -913,6 +1024,8 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | |||
913 | case PERF_TYPE_RAW: | 1024 | case PERF_TYPE_RAW: |
914 | ev = counter->attr.config; | 1025 | ev = counter->attr.config; |
915 | break; | 1026 | break; |
1027 | default: | ||
1028 | return ERR_PTR(-EINVAL); | ||
916 | } | 1029 | } |
917 | counter->hw.config_base = ev; | 1030 | counter->hw.config_base = ev; |
918 | counter->hw.idx = 0; | 1031 | counter->hw.idx = 0; |
@@ -1007,13 +1120,12 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | |||
1007 | * things if requested. Note that interrupts are hard-disabled | 1120 | * things if requested. Note that interrupts are hard-disabled |
1008 | * here so there is no possibility of being interrupted. | 1121 | * here so there is no possibility of being interrupted. |
1009 | */ | 1122 | */ |
1010 | static void record_and_restart(struct perf_counter *counter, long val, | 1123 | static void record_and_restart(struct perf_counter *counter, unsigned long val, |
1011 | struct pt_regs *regs, int nmi) | 1124 | struct pt_regs *regs, int nmi) |
1012 | { | 1125 | { |
1013 | u64 period = counter->hw.sample_period; | 1126 | u64 period = counter->hw.sample_period; |
1014 | s64 prev, delta, left; | 1127 | s64 prev, delta, left; |
1015 | int record = 0; | 1128 | int record = 0; |
1016 | u64 addr, mmcra, sdsync; | ||
1017 | 1129 | ||
1018 | /* we don't have to worry about interrupts here */ | 1130 | /* we don't have to worry about interrupts here */ |
1019 | prev = atomic64_read(&counter->hw.prev_count); | 1131 | prev = atomic64_read(&counter->hw.prev_count); |
@@ -1033,8 +1145,8 @@ static void record_and_restart(struct perf_counter *counter, long val, | |||
1033 | left = period; | 1145 | left = period; |
1034 | record = 1; | 1146 | record = 1; |
1035 | } | 1147 | } |
1036 | if (left < 0x80000000L) | 1148 | if (left < 0x80000000LL) |
1037 | val = 0x80000000L - left; | 1149 | val = 0x80000000LL - left; |
1038 | } | 1150 | } |
1039 | 1151 | ||
1040 | /* | 1152 | /* |
@@ -1047,22 +1159,9 @@ static void record_and_restart(struct perf_counter *counter, long val, | |||
1047 | .period = counter->hw.last_period, | 1159 | .period = counter->hw.last_period, |
1048 | }; | 1160 | }; |
1049 | 1161 | ||
1050 | if (counter->attr.sample_type & PERF_SAMPLE_ADDR) { | 1162 | if (counter->attr.sample_type & PERF_SAMPLE_ADDR) |
1051 | /* | 1163 | perf_get_data_addr(regs, &data.addr); |
1052 | * The user wants a data address recorded. | 1164 | |
1053 | * If we're not doing instruction sampling, | ||
1054 | * give them the SDAR (sampled data address). | ||
1055 | * If we are doing instruction sampling, then only | ||
1056 | * give them the SDAR if it corresponds to the | ||
1057 | * instruction pointed to by SIAR; this is indicated | ||
1058 | * by the [POWER6_]MMCRA_SDSYNC bit in MMCRA. | ||
1059 | */ | ||
1060 | mmcra = regs->dsisr; | ||
1061 | sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? | ||
1062 | POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; | ||
1063 | if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) | ||
1064 | data.addr = mfspr(SPRN_SDAR); | ||
1065 | } | ||
1066 | if (perf_counter_overflow(counter, nmi, &data)) { | 1165 | if (perf_counter_overflow(counter, nmi, &data)) { |
1067 | /* | 1166 | /* |
1068 | * Interrupts are coming too fast - throttle them | 1167 | * Interrupts are coming too fast - throttle them |
@@ -1088,25 +1187,12 @@ static void record_and_restart(struct perf_counter *counter, long val, | |||
1088 | */ | 1187 | */ |
1089 | unsigned long perf_misc_flags(struct pt_regs *regs) | 1188 | unsigned long perf_misc_flags(struct pt_regs *regs) |
1090 | { | 1189 | { |
1091 | unsigned long mmcra; | 1190 | u32 flags = perf_get_misc_flags(regs); |
1092 | |||
1093 | if (TRAP(regs) != 0xf00) { | ||
1094 | /* not a PMU interrupt */ | ||
1095 | return user_mode(regs) ? PERF_EVENT_MISC_USER : | ||
1096 | PERF_EVENT_MISC_KERNEL; | ||
1097 | } | ||
1098 | 1191 | ||
1099 | mmcra = regs->dsisr; | 1192 | if (flags) |
1100 | if (ppmu->flags & PPMU_ALT_SIPR) { | 1193 | return flags; |
1101 | if (mmcra & POWER6_MMCRA_SIHV) | 1194 | return user_mode(regs) ? PERF_EVENT_MISC_USER : |
1102 | return PERF_EVENT_MISC_HYPERVISOR; | 1195 | PERF_EVENT_MISC_KERNEL; |
1103 | return (mmcra & POWER6_MMCRA_SIPR) ? PERF_EVENT_MISC_USER : | ||
1104 | PERF_EVENT_MISC_KERNEL; | ||
1105 | } | ||
1106 | if (mmcra & MMCRA_SIHV) | ||
1107 | return PERF_EVENT_MISC_HYPERVISOR; | ||
1108 | return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER : | ||
1109 | PERF_EVENT_MISC_KERNEL; | ||
1110 | } | 1196 | } |
1111 | 1197 | ||
1112 | /* | 1198 | /* |
@@ -1115,20 +1201,12 @@ unsigned long perf_misc_flags(struct pt_regs *regs) | |||
1115 | */ | 1201 | */ |
1116 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | 1202 | unsigned long perf_instruction_pointer(struct pt_regs *regs) |
1117 | { | 1203 | { |
1118 | unsigned long mmcra; | ||
1119 | unsigned long ip; | 1204 | unsigned long ip; |
1120 | unsigned long slot; | ||
1121 | 1205 | ||
1122 | if (TRAP(regs) != 0xf00) | 1206 | if (TRAP(regs) != 0xf00) |
1123 | return regs->nip; /* not a PMU interrupt */ | 1207 | return regs->nip; /* not a PMU interrupt */ |
1124 | 1208 | ||
1125 | ip = mfspr(SPRN_SIAR); | 1209 | ip = mfspr(SPRN_SIAR) + perf_ip_adjust(regs); |
1126 | mmcra = regs->dsisr; | ||
1127 | if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { | ||
1128 | slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; | ||
1129 | if (slot > 1) | ||
1130 | ip += 4 * (slot - 1); | ||
1131 | } | ||
1132 | return ip; | 1210 | return ip; |
1133 | } | 1211 | } |
1134 | 1212 | ||
@@ -1140,7 +1218,7 @@ static void perf_counter_interrupt(struct pt_regs *regs) | |||
1140 | int i; | 1218 | int i; |
1141 | struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); | 1219 | struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); |
1142 | struct perf_counter *counter; | 1220 | struct perf_counter *counter; |
1143 | long val; | 1221 | unsigned long val; |
1144 | int found = 0; | 1222 | int found = 0; |
1145 | int nmi; | 1223 | int nmi; |
1146 | 1224 | ||
@@ -1148,16 +1226,9 @@ static void perf_counter_interrupt(struct pt_regs *regs) | |||
1148 | freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), | 1226 | freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), |
1149 | mfspr(SPRN_PMC6)); | 1227 | mfspr(SPRN_PMC6)); |
1150 | 1228 | ||
1151 | /* | 1229 | perf_read_regs(regs); |
1152 | * Overload regs->dsisr to store MMCRA so we only need to read it once. | ||
1153 | */ | ||
1154 | regs->dsisr = mfspr(SPRN_MMCRA); | ||
1155 | 1230 | ||
1156 | /* | 1231 | nmi = perf_intr_is_nmi(regs); |
1157 | * If interrupts were soft-disabled when this PMU interrupt | ||
1158 | * occurred, treat it as an NMI. | ||
1159 | */ | ||
1160 | nmi = !regs->softe; | ||
1161 | if (nmi) | 1232 | if (nmi) |
1162 | nmi_enter(); | 1233 | nmi_enter(); |
1163 | else | 1234 | else |
@@ -1214,50 +1285,22 @@ void hw_perf_counter_setup(int cpu) | |||
1214 | cpuhw->mmcr[0] = MMCR0_FC; | 1285 | cpuhw->mmcr[0] = MMCR0_FC; |
1215 | } | 1286 | } |
1216 | 1287 | ||
1217 | extern struct power_pmu power4_pmu; | 1288 | int register_power_pmu(struct power_pmu *pmu) |
1218 | extern struct power_pmu ppc970_pmu; | ||
1219 | extern struct power_pmu power5_pmu; | ||
1220 | extern struct power_pmu power5p_pmu; | ||
1221 | extern struct power_pmu power6_pmu; | ||
1222 | extern struct power_pmu power7_pmu; | ||
1223 | |||
1224 | static int init_perf_counters(void) | ||
1225 | { | 1289 | { |
1226 | unsigned long pvr; | 1290 | if (ppmu) |
1227 | 1291 | return -EBUSY; /* something's already registered */ | |
1228 | /* XXX should get this from cputable */ | 1292 | |
1229 | pvr = mfspr(SPRN_PVR); | 1293 | ppmu = pmu; |
1230 | switch (PVR_VER(pvr)) { | 1294 | pr_info("%s performance monitor hardware support registered\n", |
1231 | case PV_POWER4: | 1295 | pmu->name); |
1232 | case PV_POWER4p: | ||
1233 | ppmu = &power4_pmu; | ||
1234 | break; | ||
1235 | case PV_970: | ||
1236 | case PV_970FX: | ||
1237 | case PV_970MP: | ||
1238 | ppmu = &ppc970_pmu; | ||
1239 | break; | ||
1240 | case PV_POWER5: | ||
1241 | ppmu = &power5_pmu; | ||
1242 | break; | ||
1243 | case PV_POWER5p: | ||
1244 | ppmu = &power5p_pmu; | ||
1245 | break; | ||
1246 | case 0x3e: | ||
1247 | ppmu = &power6_pmu; | ||
1248 | break; | ||
1249 | case 0x3f: | ||
1250 | ppmu = &power7_pmu; | ||
1251 | break; | ||
1252 | } | ||
1253 | 1296 | ||
1297 | #ifdef MSR_HV | ||
1254 | /* | 1298 | /* |
1255 | * Use FCHV to ignore kernel events if MSR.HV is set. | 1299 | * Use FCHV to ignore kernel events if MSR.HV is set. |
1256 | */ | 1300 | */ |
1257 | if (mfmsr() & MSR_HV) | 1301 | if (mfmsr() & MSR_HV) |
1258 | freeze_counters_kernel = MMCR0_FCHV; | 1302 | freeze_counters_kernel = MMCR0_FCHV; |
1303 | #endif /* CONFIG_PPC64 */ | ||
1259 | 1304 | ||
1260 | return 0; | 1305 | return 0; |
1261 | } | 1306 | } |
1262 | |||
1263 | arch_initcall(init_perf_counters); | ||
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c index 07bd308a5fa7..db90b0c5c27b 100644 --- a/arch/powerpc/kernel/power4-pmu.c +++ b/arch/powerpc/kernel/power4-pmu.c | |||
@@ -10,7 +10,9 @@ | |||
10 | */ | 10 | */ |
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/perf_counter.h> | 12 | #include <linux/perf_counter.h> |
13 | #include <linux/string.h> | ||
13 | #include <asm/reg.h> | 14 | #include <asm/reg.h> |
15 | #include <asm/cputable.h> | ||
14 | 16 | ||
15 | /* | 17 | /* |
16 | * Bits in event code for POWER4 | 18 | * Bits in event code for POWER4 |
@@ -179,22 +181,22 @@ static short mmcr1_adder_bits[8] = { | |||
179 | */ | 181 | */ |
180 | 182 | ||
181 | static struct unitinfo { | 183 | static struct unitinfo { |
182 | u64 value, mask; | 184 | unsigned long value, mask; |
183 | int unit; | 185 | int unit; |
184 | int lowerbit; | 186 | int lowerbit; |
185 | } p4_unitinfo[16] = { | 187 | } p4_unitinfo[16] = { |
186 | [PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 }, | 188 | [PM_FPU] = { 0x44000000000000ul, 0x88000000000000ul, PM_FPU, 0 }, |
187 | [PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, | 189 | [PM_ISU1] = { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 }, |
188 | [PM_ISU1_ALT] = | 190 | [PM_ISU1_ALT] = |
189 | { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, | 191 | { 0x20080000000000ul, 0x88000000000000ul, PM_ISU1, 0 }, |
190 | [PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, | 192 | [PM_IFU] = { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 }, |
191 | [PM_IFU_ALT] = | 193 | [PM_IFU_ALT] = |
192 | { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, | 194 | { 0x02200000000000ul, 0x08820000000000ul, PM_IFU, 41 }, |
193 | [PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 }, | 195 | [PM_IDU0] = { 0x10100000000000ul, 0x80840000000000ul, PM_IDU0, 1 }, |
194 | [PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 }, | 196 | [PM_ISU2] = { 0x10140000000000ul, 0x80840000000000ul, PM_ISU2, 0 }, |
195 | [PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 }, | 197 | [PM_LSU0] = { 0x01400000000000ul, 0x08800000000000ul, PM_LSU0, 0 }, |
196 | [PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 }, | 198 | [PM_LSU1] = { 0x00000000000000ul, 0x00010000000000ul, PM_LSU1, 40 }, |
197 | [PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 } | 199 | [PM_GPS] = { 0x00000000000000ul, 0x00000000000000ul, PM_GPS, 0 } |
198 | }; | 200 | }; |
199 | 201 | ||
200 | static unsigned char direct_marked_event[8] = { | 202 | static unsigned char direct_marked_event[8] = { |
@@ -249,10 +251,11 @@ static int p4_marked_instr_event(u64 event) | |||
249 | return (mask >> (byte * 8 + bit)) & 1; | 251 | return (mask >> (byte * 8 + bit)) & 1; |
250 | } | 252 | } |
251 | 253 | ||
252 | static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp) | 254 | static int p4_get_constraint(u64 event, unsigned long *maskp, |
255 | unsigned long *valp) | ||
253 | { | 256 | { |
254 | int pmc, byte, unit, lower, sh; | 257 | int pmc, byte, unit, lower, sh; |
255 | u64 mask = 0, value = 0; | 258 | unsigned long mask = 0, value = 0; |
256 | int grp = -1; | 259 | int grp = -1; |
257 | 260 | ||
258 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | 261 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; |
@@ -282,14 +285,14 @@ static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp) | |||
282 | value |= p4_unitinfo[unit].value; | 285 | value |= p4_unitinfo[unit].value; |
283 | sh = p4_unitinfo[unit].lowerbit; | 286 | sh = p4_unitinfo[unit].lowerbit; |
284 | if (sh > 1) | 287 | if (sh > 1) |
285 | value |= (u64)lower << sh; | 288 | value |= (unsigned long)lower << sh; |
286 | else if (lower != sh) | 289 | else if (lower != sh) |
287 | return -1; | 290 | return -1; |
288 | unit = p4_unitinfo[unit].unit; | 291 | unit = p4_unitinfo[unit].unit; |
289 | 292 | ||
290 | /* Set byte lane select field */ | 293 | /* Set byte lane select field */ |
291 | mask |= 0xfULL << (28 - 4 * byte); | 294 | mask |= 0xfULL << (28 - 4 * byte); |
292 | value |= (u64)unit << (28 - 4 * byte); | 295 | value |= (unsigned long)unit << (28 - 4 * byte); |
293 | } | 296 | } |
294 | if (grp == 0) { | 297 | if (grp == 0) { |
295 | /* increment PMC1/2/5/6 field */ | 298 | /* increment PMC1/2/5/6 field */ |
@@ -353,9 +356,9 @@ static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | |||
353 | } | 356 | } |
354 | 357 | ||
355 | static int p4_compute_mmcr(u64 event[], int n_ev, | 358 | static int p4_compute_mmcr(u64 event[], int n_ev, |
356 | unsigned int hwc[], u64 mmcr[]) | 359 | unsigned int hwc[], unsigned long mmcr[]) |
357 | { | 360 | { |
358 | u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; | 361 | unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; |
359 | unsigned int pmc, unit, byte, psel, lower; | 362 | unsigned int pmc, unit, byte, psel, lower; |
360 | unsigned int ttm, grp; | 363 | unsigned int ttm, grp; |
361 | unsigned int pmc_inuse = 0; | 364 | unsigned int pmc_inuse = 0; |
@@ -429,9 +432,11 @@ static int p4_compute_mmcr(u64 event[], int n_ev, | |||
429 | return -1; | 432 | return -1; |
430 | 433 | ||
431 | /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ | 434 | /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ |
432 | mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH; | 435 | mmcr1 |= (unsigned long)(unituse[3] * 2 + unituse[2]) |
433 | mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH; | 436 | << MMCR1_TTM0SEL_SH; |
434 | mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH; | 437 | mmcr1 |= (unsigned long)(unituse[7] * 3 + unituse[6] * 2) |
438 | << MMCR1_TTM1SEL_SH; | ||
439 | mmcr1 |= (unsigned long)unituse[9] << MMCR1_TTM2SEL_SH; | ||
435 | 440 | ||
436 | /* Set TTCxSEL fields. */ | 441 | /* Set TTCxSEL fields. */ |
437 | if (unitlower & 0xe) | 442 | if (unitlower & 0xe) |
@@ -456,7 +461,8 @@ static int p4_compute_mmcr(u64 event[], int n_ev, | |||
456 | ttm = unit - 1; /* 2->1, 3->2 */ | 461 | ttm = unit - 1; /* 2->1, 3->2 */ |
457 | else | 462 | else |
458 | ttm = unit >> 2; | 463 | ttm = unit >> 2; |
459 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte); | 464 | mmcr1 |= (unsigned long)ttm |
465 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
460 | } | 466 | } |
461 | } | 467 | } |
462 | 468 | ||
@@ -519,7 +525,7 @@ static int p4_compute_mmcr(u64 event[], int n_ev, | |||
519 | return 0; | 525 | return 0; |
520 | } | 526 | } |
521 | 527 | ||
522 | static void p4_disable_pmc(unsigned int pmc, u64 mmcr[]) | 528 | static void p4_disable_pmc(unsigned int pmc, unsigned long mmcr[]) |
523 | { | 529 | { |
524 | /* | 530 | /* |
525 | * Setting the PMCxSEL field to 0 disables PMC x. | 531 | * Setting the PMCxSEL field to 0 disables PMC x. |
@@ -583,16 +589,27 @@ static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | |||
583 | }, | 589 | }, |
584 | }; | 590 | }; |
585 | 591 | ||
586 | struct power_pmu power4_pmu = { | 592 | static struct power_pmu power4_pmu = { |
587 | .n_counter = 8, | 593 | .name = "POWER4/4+", |
588 | .max_alternatives = 5, | 594 | .n_counter = 8, |
589 | .add_fields = 0x0000001100005555ull, | 595 | .max_alternatives = 5, |
590 | .test_adder = 0x0011083300000000ull, | 596 | .add_fields = 0x0000001100005555ul, |
591 | .compute_mmcr = p4_compute_mmcr, | 597 | .test_adder = 0x0011083300000000ul, |
592 | .get_constraint = p4_get_constraint, | 598 | .compute_mmcr = p4_compute_mmcr, |
593 | .get_alternatives = p4_get_alternatives, | 599 | .get_constraint = p4_get_constraint, |
594 | .disable_pmc = p4_disable_pmc, | 600 | .get_alternatives = p4_get_alternatives, |
595 | .n_generic = ARRAY_SIZE(p4_generic_events), | 601 | .disable_pmc = p4_disable_pmc, |
596 | .generic_events = p4_generic_events, | 602 | .n_generic = ARRAY_SIZE(p4_generic_events), |
597 | .cache_events = &power4_cache_events, | 603 | .generic_events = p4_generic_events, |
604 | .cache_events = &power4_cache_events, | ||
598 | }; | 605 | }; |
606 | |||
607 | static int init_power4_pmu(void) | ||
608 | { | ||
609 | if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power4")) | ||
610 | return -ENODEV; | ||
611 | |||
612 | return register_power_pmu(&power4_pmu); | ||
613 | } | ||
614 | |||
615 | arch_initcall(init_power4_pmu); | ||
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c index 41e5d2d958d4..f4adca8e98a4 100644 --- a/arch/powerpc/kernel/power5+-pmu.c +++ b/arch/powerpc/kernel/power5+-pmu.c | |||
@@ -10,7 +10,9 @@ | |||
10 | */ | 10 | */ |
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/perf_counter.h> | 12 | #include <linux/perf_counter.h> |
13 | #include <linux/string.h> | ||
13 | #include <asm/reg.h> | 14 | #include <asm/reg.h> |
15 | #include <asm/cputable.h> | ||
14 | 16 | ||
15 | /* | 17 | /* |
16 | * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) | 18 | * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) |
@@ -126,20 +128,21 @@ static const int grsel_shift[8] = { | |||
126 | }; | 128 | }; |
127 | 129 | ||
128 | /* Masks and values for using events from the various units */ | 130 | /* Masks and values for using events from the various units */ |
129 | static u64 unit_cons[PM_LASTUNIT+1][2] = { | 131 | static unsigned long unit_cons[PM_LASTUNIT+1][2] = { |
130 | [PM_FPU] = { 0x3200000000ull, 0x0100000000ull }, | 132 | [PM_FPU] = { 0x3200000000ul, 0x0100000000ul }, |
131 | [PM_ISU0] = { 0x0200000000ull, 0x0080000000ull }, | 133 | [PM_ISU0] = { 0x0200000000ul, 0x0080000000ul }, |
132 | [PM_ISU1] = { 0x3200000000ull, 0x3100000000ull }, | 134 | [PM_ISU1] = { 0x3200000000ul, 0x3100000000ul }, |
133 | [PM_IFU] = { 0x3200000000ull, 0x2100000000ull }, | 135 | [PM_IFU] = { 0x3200000000ul, 0x2100000000ul }, |
134 | [PM_IDU] = { 0x0e00000000ull, 0x0040000000ull }, | 136 | [PM_IDU] = { 0x0e00000000ul, 0x0040000000ul }, |
135 | [PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull }, | 137 | [PM_GRS] = { 0x0e00000000ul, 0x0c40000000ul }, |
136 | }; | 138 | }; |
137 | 139 | ||
138 | static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp) | 140 | static int power5p_get_constraint(u64 event, unsigned long *maskp, |
141 | unsigned long *valp) | ||
139 | { | 142 | { |
140 | int pmc, byte, unit, sh; | 143 | int pmc, byte, unit, sh; |
141 | int bit, fmask; | 144 | int bit, fmask; |
142 | u64 mask = 0, value = 0; | 145 | unsigned long mask = 0, value = 0; |
143 | 146 | ||
144 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | 147 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; |
145 | if (pmc) { | 148 | if (pmc) { |
@@ -171,17 +174,18 @@ static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp) | |||
171 | bit = event & 7; | 174 | bit = event & 7; |
172 | fmask = (bit == 6)? 7: 3; | 175 | fmask = (bit == 6)? 7: 3; |
173 | sh = grsel_shift[bit]; | 176 | sh = grsel_shift[bit]; |
174 | mask |= (u64)fmask << sh; | 177 | mask |= (unsigned long)fmask << sh; |
175 | value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; | 178 | value |= (unsigned long)((event >> PM_GRS_SH) & fmask) |
179 | << sh; | ||
176 | } | 180 | } |
177 | /* Set byte lane select field */ | 181 | /* Set byte lane select field */ |
178 | mask |= 0xfULL << (24 - 4 * byte); | 182 | mask |= 0xfUL << (24 - 4 * byte); |
179 | value |= (u64)unit << (24 - 4 * byte); | 183 | value |= (unsigned long)unit << (24 - 4 * byte); |
180 | } | 184 | } |
181 | if (pmc < 5) { | 185 | if (pmc < 5) { |
182 | /* need a counter from PMC1-4 set */ | 186 | /* need a counter from PMC1-4 set */ |
183 | mask |= 0x8000000000000ull; | 187 | mask |= 0x8000000000000ul; |
184 | value |= 0x1000000000000ull; | 188 | value |= 0x1000000000000ul; |
185 | } | 189 | } |
186 | *maskp = mask; | 190 | *maskp = mask; |
187 | *valp = value; | 191 | *valp = value; |
@@ -452,10 +456,10 @@ static int power5p_marked_instr_event(u64 event) | |||
452 | } | 456 | } |
453 | 457 | ||
454 | static int power5p_compute_mmcr(u64 event[], int n_ev, | 458 | static int power5p_compute_mmcr(u64 event[], int n_ev, |
455 | unsigned int hwc[], u64 mmcr[]) | 459 | unsigned int hwc[], unsigned long mmcr[]) |
456 | { | 460 | { |
457 | u64 mmcr1 = 0; | 461 | unsigned long mmcr1 = 0; |
458 | u64 mmcra = 0; | 462 | unsigned long mmcra = 0; |
459 | unsigned int pmc, unit, byte, psel; | 463 | unsigned int pmc, unit, byte, psel; |
460 | unsigned int ttm; | 464 | unsigned int ttm; |
461 | int i, isbus, bit, grsel; | 465 | int i, isbus, bit, grsel; |
@@ -517,7 +521,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, | |||
517 | continue; | 521 | continue; |
518 | if (ttmuse++) | 522 | if (ttmuse++) |
519 | return -1; | 523 | return -1; |
520 | mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; | 524 | mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH; |
521 | } | 525 | } |
522 | ttmuse = 0; | 526 | ttmuse = 0; |
523 | for (; i <= PM_GRS; ++i) { | 527 | for (; i <= PM_GRS; ++i) { |
@@ -525,7 +529,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, | |||
525 | continue; | 529 | continue; |
526 | if (ttmuse++) | 530 | if (ttmuse++) |
527 | return -1; | 531 | return -1; |
528 | mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; | 532 | mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH; |
529 | } | 533 | } |
530 | if (ttmuse > 1) | 534 | if (ttmuse > 1) |
531 | return -1; | 535 | return -1; |
@@ -540,10 +544,11 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, | |||
540 | unit = PM_ISU0_ALT; | 544 | unit = PM_ISU0_ALT; |
541 | } else if (unit == PM_LSU1 + 1) { | 545 | } else if (unit == PM_LSU1 + 1) { |
542 | /* select lower word of LSU1 for this byte */ | 546 | /* select lower word of LSU1 for this byte */ |
543 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); | 547 | mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte); |
544 | } | 548 | } |
545 | ttm = unit >> 2; | 549 | ttm = unit >> 2; |
546 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | 550 | mmcr1 |= (unsigned long)ttm |
551 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
547 | } | 552 | } |
548 | 553 | ||
549 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | 554 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ |
@@ -568,7 +573,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, | |||
568 | if (isbus && (byte & 2) && | 573 | if (isbus && (byte & 2) && |
569 | (psel == 8 || psel == 0x10 || psel == 0x28)) | 574 | (psel == 8 || psel == 0x10 || psel == 0x28)) |
570 | /* add events on higher-numbered bus */ | 575 | /* add events on higher-numbered bus */ |
571 | mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); | 576 | mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc); |
572 | } else { | 577 | } else { |
573 | /* Instructions or run cycles on PMC5/6 */ | 578 | /* Instructions or run cycles on PMC5/6 */ |
574 | --pmc; | 579 | --pmc; |
@@ -576,7 +581,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, | |||
576 | if (isbus && unit == PM_GRS) { | 581 | if (isbus && unit == PM_GRS) { |
577 | bit = psel & 7; | 582 | bit = psel & 7; |
578 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; | 583 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; |
579 | mmcr1 |= (u64)grsel << grsel_shift[bit]; | 584 | mmcr1 |= (unsigned long)grsel << grsel_shift[bit]; |
580 | } | 585 | } |
581 | if (power5p_marked_instr_event(event[i])) | 586 | if (power5p_marked_instr_event(event[i])) |
582 | mmcra |= MMCRA_SAMPLE_ENABLE; | 587 | mmcra |= MMCRA_SAMPLE_ENABLE; |
@@ -599,7 +604,7 @@ static int power5p_compute_mmcr(u64 event[], int n_ev, | |||
599 | return 0; | 604 | return 0; |
600 | } | 605 | } |
601 | 606 | ||
602 | static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[]) | 607 | static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[]) |
603 | { | 608 | { |
604 | if (pmc <= 3) | 609 | if (pmc <= 3) |
605 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); | 610 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); |
@@ -654,18 +659,30 @@ static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | |||
654 | }, | 659 | }, |
655 | }; | 660 | }; |
656 | 661 | ||
657 | struct power_pmu power5p_pmu = { | 662 | static struct power_pmu power5p_pmu = { |
658 | .n_counter = 6, | 663 | .name = "POWER5+/++", |
659 | .max_alternatives = MAX_ALT, | 664 | .n_counter = 6, |
660 | .add_fields = 0x7000000000055ull, | 665 | .max_alternatives = MAX_ALT, |
661 | .test_adder = 0x3000040000000ull, | 666 | .add_fields = 0x7000000000055ul, |
662 | .compute_mmcr = power5p_compute_mmcr, | 667 | .test_adder = 0x3000040000000ul, |
663 | .get_constraint = power5p_get_constraint, | 668 | .compute_mmcr = power5p_compute_mmcr, |
664 | .get_alternatives = power5p_get_alternatives, | 669 | .get_constraint = power5p_get_constraint, |
665 | .disable_pmc = power5p_disable_pmc, | 670 | .get_alternatives = power5p_get_alternatives, |
666 | .limited_pmc_event = power5p_limited_pmc_event, | 671 | .disable_pmc = power5p_disable_pmc, |
667 | .flags = PPMU_LIMITED_PMC5_6, | 672 | .limited_pmc_event = power5p_limited_pmc_event, |
668 | .n_generic = ARRAY_SIZE(power5p_generic_events), | 673 | .flags = PPMU_LIMITED_PMC5_6, |
669 | .generic_events = power5p_generic_events, | 674 | .n_generic = ARRAY_SIZE(power5p_generic_events), |
670 | .cache_events = &power5p_cache_events, | 675 | .generic_events = power5p_generic_events, |
676 | .cache_events = &power5p_cache_events, | ||
671 | }; | 677 | }; |
678 | |||
679 | static int init_power5p_pmu(void) | ||
680 | { | ||
681 | if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+") | ||
682 | && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5++")) | ||
683 | return -ENODEV; | ||
684 | |||
685 | return register_power_pmu(&power5p_pmu); | ||
686 | } | ||
687 | |||
688 | arch_initcall(init_power5p_pmu); | ||
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c index 05600b66221a..29b2c6c0e83a 100644 --- a/arch/powerpc/kernel/power5-pmu.c +++ b/arch/powerpc/kernel/power5-pmu.c | |||
@@ -10,7 +10,9 @@ | |||
10 | */ | 10 | */ |
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/perf_counter.h> | 12 | #include <linux/perf_counter.h> |
13 | #include <linux/string.h> | ||
13 | #include <asm/reg.h> | 14 | #include <asm/reg.h> |
15 | #include <asm/cputable.h> | ||
14 | 16 | ||
15 | /* | 17 | /* |
16 | * Bits in event code for POWER5 (not POWER5++) | 18 | * Bits in event code for POWER5 (not POWER5++) |
@@ -130,20 +132,21 @@ static const int grsel_shift[8] = { | |||
130 | }; | 132 | }; |
131 | 133 | ||
132 | /* Masks and values for using events from the various units */ | 134 | /* Masks and values for using events from the various units */ |
133 | static u64 unit_cons[PM_LASTUNIT+1][2] = { | 135 | static unsigned long unit_cons[PM_LASTUNIT+1][2] = { |
134 | [PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull }, | 136 | [PM_FPU] = { 0xc0002000000000ul, 0x00001000000000ul }, |
135 | [PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull }, | 137 | [PM_ISU0] = { 0x00002000000000ul, 0x00000800000000ul }, |
136 | [PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull }, | 138 | [PM_ISU1] = { 0xc0002000000000ul, 0xc0001000000000ul }, |
137 | [PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull }, | 139 | [PM_IFU] = { 0xc0002000000000ul, 0x80001000000000ul }, |
138 | [PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull }, | 140 | [PM_IDU] = { 0x30002000000000ul, 0x00000400000000ul }, |
139 | [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull }, | 141 | [PM_GRS] = { 0x30002000000000ul, 0x30000400000000ul }, |
140 | }; | 142 | }; |
141 | 143 | ||
142 | static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) | 144 | static int power5_get_constraint(u64 event, unsigned long *maskp, |
145 | unsigned long *valp) | ||
143 | { | 146 | { |
144 | int pmc, byte, unit, sh; | 147 | int pmc, byte, unit, sh; |
145 | int bit, fmask; | 148 | int bit, fmask; |
146 | u64 mask = 0, value = 0; | 149 | unsigned long mask = 0, value = 0; |
147 | int grp = -1; | 150 | int grp = -1; |
148 | 151 | ||
149 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | 152 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; |
@@ -178,8 +181,9 @@ static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) | |||
178 | bit = event & 7; | 181 | bit = event & 7; |
179 | fmask = (bit == 6)? 7: 3; | 182 | fmask = (bit == 6)? 7: 3; |
180 | sh = grsel_shift[bit]; | 183 | sh = grsel_shift[bit]; |
181 | mask |= (u64)fmask << sh; | 184 | mask |= (unsigned long)fmask << sh; |
182 | value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; | 185 | value |= (unsigned long)((event >> PM_GRS_SH) & fmask) |
186 | << sh; | ||
183 | } | 187 | } |
184 | /* | 188 | /* |
185 | * Bus events on bytes 0 and 2 can be counted | 189 | * Bus events on bytes 0 and 2 can be counted |
@@ -188,22 +192,22 @@ static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) | |||
188 | if (!pmc) | 192 | if (!pmc) |
189 | grp = byte & 1; | 193 | grp = byte & 1; |
190 | /* Set byte lane select field */ | 194 | /* Set byte lane select field */ |
191 | mask |= 0xfULL << (24 - 4 * byte); | 195 | mask |= 0xfUL << (24 - 4 * byte); |
192 | value |= (u64)unit << (24 - 4 * byte); | 196 | value |= (unsigned long)unit << (24 - 4 * byte); |
193 | } | 197 | } |
194 | if (grp == 0) { | 198 | if (grp == 0) { |
195 | /* increment PMC1/2 field */ | 199 | /* increment PMC1/2 field */ |
196 | mask |= 0x200000000ull; | 200 | mask |= 0x200000000ul; |
197 | value |= 0x080000000ull; | 201 | value |= 0x080000000ul; |
198 | } else if (grp == 1) { | 202 | } else if (grp == 1) { |
199 | /* increment PMC3/4 field */ | 203 | /* increment PMC3/4 field */ |
200 | mask |= 0x40000000ull; | 204 | mask |= 0x40000000ul; |
201 | value |= 0x10000000ull; | 205 | value |= 0x10000000ul; |
202 | } | 206 | } |
203 | if (pmc < 5) { | 207 | if (pmc < 5) { |
204 | /* need a counter from PMC1-4 set */ | 208 | /* need a counter from PMC1-4 set */ |
205 | mask |= 0x8000000000000ull; | 209 | mask |= 0x8000000000000ul; |
206 | value |= 0x1000000000000ull; | 210 | value |= 0x1000000000000ul; |
207 | } | 211 | } |
208 | *maskp = mask; | 212 | *maskp = mask; |
209 | *valp = value; | 213 | *valp = value; |
@@ -383,10 +387,10 @@ static int power5_marked_instr_event(u64 event) | |||
383 | } | 387 | } |
384 | 388 | ||
385 | static int power5_compute_mmcr(u64 event[], int n_ev, | 389 | static int power5_compute_mmcr(u64 event[], int n_ev, |
386 | unsigned int hwc[], u64 mmcr[]) | 390 | unsigned int hwc[], unsigned long mmcr[]) |
387 | { | 391 | { |
388 | u64 mmcr1 = 0; | 392 | unsigned long mmcr1 = 0; |
389 | u64 mmcra = 0; | 393 | unsigned long mmcra = 0; |
390 | unsigned int pmc, unit, byte, psel; | 394 | unsigned int pmc, unit, byte, psel; |
391 | unsigned int ttm, grp; | 395 | unsigned int ttm, grp; |
392 | int i, isbus, bit, grsel; | 396 | int i, isbus, bit, grsel; |
@@ -457,7 +461,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, | |||
457 | continue; | 461 | continue; |
458 | if (ttmuse++) | 462 | if (ttmuse++) |
459 | return -1; | 463 | return -1; |
460 | mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; | 464 | mmcr1 |= (unsigned long)i << MMCR1_TTM0SEL_SH; |
461 | } | 465 | } |
462 | ttmuse = 0; | 466 | ttmuse = 0; |
463 | for (; i <= PM_GRS; ++i) { | 467 | for (; i <= PM_GRS; ++i) { |
@@ -465,7 +469,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, | |||
465 | continue; | 469 | continue; |
466 | if (ttmuse++) | 470 | if (ttmuse++) |
467 | return -1; | 471 | return -1; |
468 | mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; | 472 | mmcr1 |= (unsigned long)(i & 3) << MMCR1_TTM1SEL_SH; |
469 | } | 473 | } |
470 | if (ttmuse > 1) | 474 | if (ttmuse > 1) |
471 | return -1; | 475 | return -1; |
@@ -480,10 +484,11 @@ static int power5_compute_mmcr(u64 event[], int n_ev, | |||
480 | unit = PM_ISU0_ALT; | 484 | unit = PM_ISU0_ALT; |
481 | } else if (unit == PM_LSU1 + 1) { | 485 | } else if (unit == PM_LSU1 + 1) { |
482 | /* select lower word of LSU1 for this byte */ | 486 | /* select lower word of LSU1 for this byte */ |
483 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); | 487 | mmcr1 |= 1ul << (MMCR1_TTM3SEL_SH + 3 - byte); |
484 | } | 488 | } |
485 | ttm = unit >> 2; | 489 | ttm = unit >> 2; |
486 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | 490 | mmcr1 |= (unsigned long)ttm |
491 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
487 | } | 492 | } |
488 | 493 | ||
489 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | 494 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ |
@@ -513,7 +518,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, | |||
513 | --pmc; | 518 | --pmc; |
514 | if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) | 519 | if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) |
515 | /* add events on higher-numbered bus */ | 520 | /* add events on higher-numbered bus */ |
516 | mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); | 521 | mmcr1 |= 1ul << (MMCR1_PMC1_ADDER_SEL_SH - pmc); |
517 | } else { | 522 | } else { |
518 | /* Instructions or run cycles on PMC5/6 */ | 523 | /* Instructions or run cycles on PMC5/6 */ |
519 | --pmc; | 524 | --pmc; |
@@ -521,7 +526,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, | |||
521 | if (isbus && unit == PM_GRS) { | 526 | if (isbus && unit == PM_GRS) { |
522 | bit = psel & 7; | 527 | bit = psel & 7; |
523 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; | 528 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; |
524 | mmcr1 |= (u64)grsel << grsel_shift[bit]; | 529 | mmcr1 |= (unsigned long)grsel << grsel_shift[bit]; |
525 | } | 530 | } |
526 | if (power5_marked_instr_event(event[i])) | 531 | if (power5_marked_instr_event(event[i])) |
527 | mmcra |= MMCRA_SAMPLE_ENABLE; | 532 | mmcra |= MMCRA_SAMPLE_ENABLE; |
@@ -541,7 +546,7 @@ static int power5_compute_mmcr(u64 event[], int n_ev, | |||
541 | return 0; | 546 | return 0; |
542 | } | 547 | } |
543 | 548 | ||
544 | static void power5_disable_pmc(unsigned int pmc, u64 mmcr[]) | 549 | static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[]) |
545 | { | 550 | { |
546 | if (pmc <= 3) | 551 | if (pmc <= 3) |
547 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); | 552 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); |
@@ -596,16 +601,27 @@ static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | |||
596 | }, | 601 | }, |
597 | }; | 602 | }; |
598 | 603 | ||
599 | struct power_pmu power5_pmu = { | 604 | static struct power_pmu power5_pmu = { |
600 | .n_counter = 6, | 605 | .name = "POWER5", |
601 | .max_alternatives = MAX_ALT, | 606 | .n_counter = 6, |
602 | .add_fields = 0x7000090000555ull, | 607 | .max_alternatives = MAX_ALT, |
603 | .test_adder = 0x3000490000000ull, | 608 | .add_fields = 0x7000090000555ul, |
604 | .compute_mmcr = power5_compute_mmcr, | 609 | .test_adder = 0x3000490000000ul, |
605 | .get_constraint = power5_get_constraint, | 610 | .compute_mmcr = power5_compute_mmcr, |
606 | .get_alternatives = power5_get_alternatives, | 611 | .get_constraint = power5_get_constraint, |
607 | .disable_pmc = power5_disable_pmc, | 612 | .get_alternatives = power5_get_alternatives, |
608 | .n_generic = ARRAY_SIZE(power5_generic_events), | 613 | .disable_pmc = power5_disable_pmc, |
609 | .generic_events = power5_generic_events, | 614 | .n_generic = ARRAY_SIZE(power5_generic_events), |
610 | .cache_events = &power5_cache_events, | 615 | .generic_events = power5_generic_events, |
616 | .cache_events = &power5_cache_events, | ||
611 | }; | 617 | }; |
618 | |||
619 | static int init_power5_pmu(void) | ||
620 | { | ||
621 | if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5")) | ||
622 | return -ENODEV; | ||
623 | |||
624 | return register_power_pmu(&power5_pmu); | ||
625 | } | ||
626 | |||
627 | arch_initcall(init_power5_pmu); | ||
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 46f74bebcfd9..09ae5bf5bda7 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c | |||
@@ -10,7 +10,9 @@ | |||
10 | */ | 10 | */ |
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/perf_counter.h> | 12 | #include <linux/perf_counter.h> |
13 | #include <linux/string.h> | ||
13 | #include <asm/reg.h> | 14 | #include <asm/reg.h> |
15 | #include <asm/cputable.h> | ||
14 | 16 | ||
15 | /* | 17 | /* |
16 | * Bits in event code for POWER6 | 18 | * Bits in event code for POWER6 |
@@ -41,9 +43,9 @@ | |||
41 | #define MMCR1_NESTSEL_SH 45 | 43 | #define MMCR1_NESTSEL_SH 45 |
42 | #define MMCR1_NESTSEL_MSK 0x7 | 44 | #define MMCR1_NESTSEL_MSK 0x7 |
43 | #define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK) | 45 | #define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK) |
44 | #define MMCR1_PMC1_LLA ((u64)1 << 44) | 46 | #define MMCR1_PMC1_LLA (1ul << 44) |
45 | #define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39) | 47 | #define MMCR1_PMC1_LLA_VALUE (1ul << 39) |
46 | #define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35) | 48 | #define MMCR1_PMC1_ADDR_SEL (1ul << 35) |
47 | #define MMCR1_PMC1SEL_SH 24 | 49 | #define MMCR1_PMC1SEL_SH 24 |
48 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | 50 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) |
49 | #define MMCR1_PMCSEL_MSK 0xff | 51 | #define MMCR1_PMCSEL_MSK 0xff |
@@ -173,10 +175,10 @@ static int power6_marked_instr_event(u64 event) | |||
173 | * Assign PMC numbers and compute MMCR1 value for a set of events | 175 | * Assign PMC numbers and compute MMCR1 value for a set of events |
174 | */ | 176 | */ |
175 | static int p6_compute_mmcr(u64 event[], int n_ev, | 177 | static int p6_compute_mmcr(u64 event[], int n_ev, |
176 | unsigned int hwc[], u64 mmcr[]) | 178 | unsigned int hwc[], unsigned long mmcr[]) |
177 | { | 179 | { |
178 | u64 mmcr1 = 0; | 180 | unsigned long mmcr1 = 0; |
179 | u64 mmcra = 0; | 181 | unsigned long mmcra = 0; |
180 | int i; | 182 | int i; |
181 | unsigned int pmc, ev, b, u, s, psel; | 183 | unsigned int pmc, ev, b, u, s, psel; |
182 | unsigned int ttmset = 0; | 184 | unsigned int ttmset = 0; |
@@ -215,7 +217,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, | |||
215 | /* check for conflict on this byte of event bus */ | 217 | /* check for conflict on this byte of event bus */ |
216 | if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u) | 218 | if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u) |
217 | return -1; | 219 | return -1; |
218 | mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b); | 220 | mmcr1 |= (unsigned long)u << MMCR1_TTMSEL_SH(b); |
219 | ttmset |= 1 << b; | 221 | ttmset |= 1 << b; |
220 | if (u == 5) { | 222 | if (u == 5) { |
221 | /* Nest events have a further mux */ | 223 | /* Nest events have a further mux */ |
@@ -224,7 +226,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, | |||
224 | MMCR1_NESTSEL(mmcr1) != s) | 226 | MMCR1_NESTSEL(mmcr1) != s) |
225 | return -1; | 227 | return -1; |
226 | ttmset |= 0x10; | 228 | ttmset |= 0x10; |
227 | mmcr1 |= (u64)s << MMCR1_NESTSEL_SH; | 229 | mmcr1 |= (unsigned long)s << MMCR1_NESTSEL_SH; |
228 | } | 230 | } |
229 | if (0x30 <= psel && psel <= 0x3d) { | 231 | if (0x30 <= psel && psel <= 0x3d) { |
230 | /* these need the PMCx_ADDR_SEL bits */ | 232 | /* these need the PMCx_ADDR_SEL bits */ |
@@ -243,7 +245,7 @@ static int p6_compute_mmcr(u64 event[], int n_ev, | |||
243 | if (power6_marked_instr_event(event[i])) | 245 | if (power6_marked_instr_event(event[i])) |
244 | mmcra |= MMCRA_SAMPLE_ENABLE; | 246 | mmcra |= MMCRA_SAMPLE_ENABLE; |
245 | if (pmc < 4) | 247 | if (pmc < 4) |
246 | mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); | 248 | mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc); |
247 | } | 249 | } |
248 | mmcr[0] = 0; | 250 | mmcr[0] = 0; |
249 | if (pmc_inuse & 1) | 251 | if (pmc_inuse & 1) |
@@ -265,10 +267,11 @@ static int p6_compute_mmcr(u64 event[], int n_ev, | |||
265 | * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 | 267 | * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 |
266 | * 32-34 select field: nest (subunit) event selector | 268 | * 32-34 select field: nest (subunit) event selector |
267 | */ | 269 | */ |
268 | static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp) | 270 | static int p6_get_constraint(u64 event, unsigned long *maskp, |
271 | unsigned long *valp) | ||
269 | { | 272 | { |
270 | int pmc, byte, sh, subunit; | 273 | int pmc, byte, sh, subunit; |
271 | u64 mask = 0, value = 0; | 274 | unsigned long mask = 0, value = 0; |
272 | 275 | ||
273 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | 276 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; |
274 | if (pmc) { | 277 | if (pmc) { |
@@ -282,11 +285,11 @@ static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp) | |||
282 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | 285 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; |
283 | sh = byte * 4 + (16 - PM_UNIT_SH); | 286 | sh = byte * 4 + (16 - PM_UNIT_SH); |
284 | mask |= PM_UNIT_MSKS << sh; | 287 | mask |= PM_UNIT_MSKS << sh; |
285 | value |= (u64)(event & PM_UNIT_MSKS) << sh; | 288 | value |= (unsigned long)(event & PM_UNIT_MSKS) << sh; |
286 | if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { | 289 | if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { |
287 | subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; | 290 | subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; |
288 | mask |= (u64)PM_SUBUNIT_MSK << 32; | 291 | mask |= (unsigned long)PM_SUBUNIT_MSK << 32; |
289 | value |= (u64)subunit << 32; | 292 | value |= (unsigned long)subunit << 32; |
290 | } | 293 | } |
291 | } | 294 | } |
292 | if (pmc <= 4) { | 295 | if (pmc <= 4) { |
@@ -458,7 +461,7 @@ static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | |||
458 | return nalt; | 461 | return nalt; |
459 | } | 462 | } |
460 | 463 | ||
461 | static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) | 464 | static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[]) |
462 | { | 465 | { |
463 | /* Set PMCxSEL to 0 to disable PMCx */ | 466 | /* Set PMCxSEL to 0 to disable PMCx */ |
464 | if (pmc <= 3) | 467 | if (pmc <= 3) |
@@ -515,18 +518,29 @@ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | |||
515 | }, | 518 | }, |
516 | }; | 519 | }; |
517 | 520 | ||
518 | struct power_pmu power6_pmu = { | 521 | static struct power_pmu power6_pmu = { |
519 | .n_counter = 6, | 522 | .name = "POWER6", |
520 | .max_alternatives = MAX_ALT, | 523 | .n_counter = 6, |
521 | .add_fields = 0x1555, | 524 | .max_alternatives = MAX_ALT, |
522 | .test_adder = 0x3000, | 525 | .add_fields = 0x1555, |
523 | .compute_mmcr = p6_compute_mmcr, | 526 | .test_adder = 0x3000, |
524 | .get_constraint = p6_get_constraint, | 527 | .compute_mmcr = p6_compute_mmcr, |
525 | .get_alternatives = p6_get_alternatives, | 528 | .get_constraint = p6_get_constraint, |
526 | .disable_pmc = p6_disable_pmc, | 529 | .get_alternatives = p6_get_alternatives, |
527 | .limited_pmc_event = p6_limited_pmc_event, | 530 | .disable_pmc = p6_disable_pmc, |
528 | .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, | 531 | .limited_pmc_event = p6_limited_pmc_event, |
529 | .n_generic = ARRAY_SIZE(power6_generic_events), | 532 | .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, |
530 | .generic_events = power6_generic_events, | 533 | .n_generic = ARRAY_SIZE(power6_generic_events), |
531 | .cache_events = &power6_cache_events, | 534 | .generic_events = power6_generic_events, |
535 | .cache_events = &power6_cache_events, | ||
532 | }; | 536 | }; |
537 | |||
538 | static int init_power6_pmu(void) | ||
539 | { | ||
540 | if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6")) | ||
541 | return -ENODEV; | ||
542 | |||
543 | return register_power_pmu(&power6_pmu); | ||
544 | } | ||
545 | |||
546 | arch_initcall(init_power6_pmu); | ||
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index b72e7a19d054..5d755ef7ac8f 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c | |||
@@ -10,7 +10,9 @@ | |||
10 | */ | 10 | */ |
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/perf_counter.h> | 12 | #include <linux/perf_counter.h> |
13 | #include <linux/string.h> | ||
13 | #include <asm/reg.h> | 14 | #include <asm/reg.h> |
15 | #include <asm/cputable.h> | ||
14 | 16 | ||
15 | /* | 17 | /* |
16 | * Bits in event code for POWER7 | 18 | * Bits in event code for POWER7 |
@@ -71,10 +73,11 @@ | |||
71 | * 0-9: Count of events needing PMC1..PMC5 | 73 | * 0-9: Count of events needing PMC1..PMC5 |
72 | */ | 74 | */ |
73 | 75 | ||
74 | static int power7_get_constraint(u64 event, u64 *maskp, u64 *valp) | 76 | static int power7_get_constraint(u64 event, unsigned long *maskp, |
77 | unsigned long *valp) | ||
75 | { | 78 | { |
76 | int pmc, sh; | 79 | int pmc, sh; |
77 | u64 mask = 0, value = 0; | 80 | unsigned long mask = 0, value = 0; |
78 | 81 | ||
79 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | 82 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; |
80 | if (pmc) { | 83 | if (pmc) { |
@@ -224,10 +227,10 @@ static int power7_marked_instr_event(u64 event) | |||
224 | } | 227 | } |
225 | 228 | ||
226 | static int power7_compute_mmcr(u64 event[], int n_ev, | 229 | static int power7_compute_mmcr(u64 event[], int n_ev, |
227 | unsigned int hwc[], u64 mmcr[]) | 230 | unsigned int hwc[], unsigned long mmcr[]) |
228 | { | 231 | { |
229 | u64 mmcr1 = 0; | 232 | unsigned long mmcr1 = 0; |
230 | u64 mmcra = 0; | 233 | unsigned long mmcra = 0; |
231 | unsigned int pmc, unit, combine, l2sel, psel; | 234 | unsigned int pmc, unit, combine, l2sel, psel; |
232 | unsigned int pmc_inuse = 0; | 235 | unsigned int pmc_inuse = 0; |
233 | int i; | 236 | int i; |
@@ -265,11 +268,14 @@ static int power7_compute_mmcr(u64 event[], int n_ev, | |||
265 | --pmc; | 268 | --pmc; |
266 | } | 269 | } |
267 | if (pmc <= 3) { | 270 | if (pmc <= 3) { |
268 | mmcr1 |= (u64) unit << (MMCR1_TTM0SEL_SH - 4 * pmc); | 271 | mmcr1 |= (unsigned long) unit |
269 | mmcr1 |= (u64) combine << (MMCR1_PMC1_COMBINE_SH - pmc); | 272 | << (MMCR1_TTM0SEL_SH - 4 * pmc); |
273 | mmcr1 |= (unsigned long) combine | ||
274 | << (MMCR1_PMC1_COMBINE_SH - pmc); | ||
270 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | 275 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); |
271 | if (unit == 6) /* L2 events */ | 276 | if (unit == 6) /* L2 events */ |
272 | mmcr1 |= (u64) l2sel << MMCR1_L2SEL_SH; | 277 | mmcr1 |= (unsigned long) l2sel |
278 | << MMCR1_L2SEL_SH; | ||
273 | } | 279 | } |
274 | if (power7_marked_instr_event(event[i])) | 280 | if (power7_marked_instr_event(event[i])) |
275 | mmcra |= MMCRA_SAMPLE_ENABLE; | 281 | mmcra |= MMCRA_SAMPLE_ENABLE; |
@@ -287,10 +293,10 @@ static int power7_compute_mmcr(u64 event[], int n_ev, | |||
287 | return 0; | 293 | return 0; |
288 | } | 294 | } |
289 | 295 | ||
290 | static void power7_disable_pmc(unsigned int pmc, u64 mmcr[]) | 296 | static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[]) |
291 | { | 297 | { |
292 | if (pmc <= 3) | 298 | if (pmc <= 3) |
293 | mmcr[1] &= ~(0xffULL << MMCR1_PMCSEL_SH(pmc)); | 299 | mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); |
294 | } | 300 | } |
295 | 301 | ||
296 | static int power7_generic_events[] = { | 302 | static int power7_generic_events[] = { |
@@ -342,16 +348,27 @@ static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | |||
342 | }, | 348 | }, |
343 | }; | 349 | }; |
344 | 350 | ||
345 | struct power_pmu power7_pmu = { | 351 | static struct power_pmu power7_pmu = { |
346 | .n_counter = 6, | 352 | .name = "POWER7", |
347 | .max_alternatives = MAX_ALT + 1, | 353 | .n_counter = 6, |
348 | .add_fields = 0x1555ull, | 354 | .max_alternatives = MAX_ALT + 1, |
349 | .test_adder = 0x3000ull, | 355 | .add_fields = 0x1555ul, |
350 | .compute_mmcr = power7_compute_mmcr, | 356 | .test_adder = 0x3000ul, |
351 | .get_constraint = power7_get_constraint, | 357 | .compute_mmcr = power7_compute_mmcr, |
352 | .get_alternatives = power7_get_alternatives, | 358 | .get_constraint = power7_get_constraint, |
353 | .disable_pmc = power7_disable_pmc, | 359 | .get_alternatives = power7_get_alternatives, |
354 | .n_generic = ARRAY_SIZE(power7_generic_events), | 360 | .disable_pmc = power7_disable_pmc, |
355 | .generic_events = power7_generic_events, | 361 | .n_generic = ARRAY_SIZE(power7_generic_events), |
356 | .cache_events = &power7_cache_events, | 362 | .generic_events = power7_generic_events, |
363 | .cache_events = &power7_cache_events, | ||
357 | }; | 364 | }; |
365 | |||
366 | static int init_power7_pmu(void) | ||
367 | { | ||
368 | if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7")) | ||
369 | return -ENODEV; | ||
370 | |||
371 | return register_power_pmu(&power7_pmu); | ||
372 | } | ||
373 | |||
374 | arch_initcall(init_power7_pmu); | ||
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c index ba0a357a89f4..6637c87fe70e 100644 --- a/arch/powerpc/kernel/ppc970-pmu.c +++ b/arch/powerpc/kernel/ppc970-pmu.c | |||
@@ -10,7 +10,9 @@ | |||
10 | */ | 10 | */ |
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #include <linux/perf_counter.h> | 12 | #include <linux/perf_counter.h> |
13 | #include <linux/string.h> | ||
13 | #include <asm/reg.h> | 14 | #include <asm/reg.h> |
15 | #include <asm/cputable.h> | ||
14 | 16 | ||
15 | /* | 17 | /* |
16 | * Bits in event code for PPC970 | 18 | * Bits in event code for PPC970 |
@@ -183,7 +185,7 @@ static int p970_marked_instr_event(u64 event) | |||
183 | } | 185 | } |
184 | 186 | ||
185 | /* Masks and values for using events from the various units */ | 187 | /* Masks and values for using events from the various units */ |
186 | static u64 unit_cons[PM_LASTUNIT+1][2] = { | 188 | static unsigned long unit_cons[PM_LASTUNIT+1][2] = { |
187 | [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, | 189 | [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, |
188 | [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull }, | 190 | [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull }, |
189 | [PM_ISU] = { 0x080000000000ull, 0x020000000000ull }, | 191 | [PM_ISU] = { 0x080000000000ull, 0x020000000000ull }, |
@@ -192,10 +194,11 @@ static u64 unit_cons[PM_LASTUNIT+1][2] = { | |||
192 | [PM_STS] = { 0x380000000000ull, 0x310000000000ull }, | 194 | [PM_STS] = { 0x380000000000ull, 0x310000000000ull }, |
193 | }; | 195 | }; |
194 | 196 | ||
195 | static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) | 197 | static int p970_get_constraint(u64 event, unsigned long *maskp, |
198 | unsigned long *valp) | ||
196 | { | 199 | { |
197 | int pmc, byte, unit, sh, spcsel; | 200 | int pmc, byte, unit, sh, spcsel; |
198 | u64 mask = 0, value = 0; | 201 | unsigned long mask = 0, value = 0; |
199 | int grp = -1; | 202 | int grp = -1; |
200 | 203 | ||
201 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | 204 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; |
@@ -222,7 +225,7 @@ static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) | |||
222 | grp = byte & 1; | 225 | grp = byte & 1; |
223 | /* Set byte lane select field */ | 226 | /* Set byte lane select field */ |
224 | mask |= 0xfULL << (28 - 4 * byte); | 227 | mask |= 0xfULL << (28 - 4 * byte); |
225 | value |= (u64)unit << (28 - 4 * byte); | 228 | value |= (unsigned long)unit << (28 - 4 * byte); |
226 | } | 229 | } |
227 | if (grp == 0) { | 230 | if (grp == 0) { |
228 | /* increment PMC1/2/5/6 field */ | 231 | /* increment PMC1/2/5/6 field */ |
@@ -236,7 +239,7 @@ static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) | |||
236 | spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; | 239 | spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; |
237 | if (spcsel) { | 240 | if (spcsel) { |
238 | mask |= 3ull << 48; | 241 | mask |= 3ull << 48; |
239 | value |= (u64)spcsel << 48; | 242 | value |= (unsigned long)spcsel << 48; |
240 | } | 243 | } |
241 | *maskp = mask; | 244 | *maskp = mask; |
242 | *valp = value; | 245 | *valp = value; |
@@ -257,9 +260,9 @@ static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | |||
257 | } | 260 | } |
258 | 261 | ||
259 | static int p970_compute_mmcr(u64 event[], int n_ev, | 262 | static int p970_compute_mmcr(u64 event[], int n_ev, |
260 | unsigned int hwc[], u64 mmcr[]) | 263 | unsigned int hwc[], unsigned long mmcr[]) |
261 | { | 264 | { |
262 | u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; | 265 | unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0; |
263 | unsigned int pmc, unit, byte, psel; | 266 | unsigned int pmc, unit, byte, psel; |
264 | unsigned int ttm, grp; | 267 | unsigned int ttm, grp; |
265 | unsigned int pmc_inuse = 0; | 268 | unsigned int pmc_inuse = 0; |
@@ -320,7 +323,7 @@ static int p970_compute_mmcr(u64 event[], int n_ev, | |||
320 | continue; | 323 | continue; |
321 | ttm = unitmap[i]; | 324 | ttm = unitmap[i]; |
322 | ++ttmuse[(ttm >> 2) & 1]; | 325 | ++ttmuse[(ttm >> 2) & 1]; |
323 | mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH; | 326 | mmcr1 |= (unsigned long)(ttm & ~4) << MMCR1_TTM1SEL_SH; |
324 | } | 327 | } |
325 | /* Check only one unit per TTMx */ | 328 | /* Check only one unit per TTMx */ |
326 | if (ttmuse[0] > 1 || ttmuse[1] > 1) | 329 | if (ttmuse[0] > 1 || ttmuse[1] > 1) |
@@ -340,7 +343,8 @@ static int p970_compute_mmcr(u64 event[], int n_ev, | |||
340 | if (unit == PM_LSU1L && byte >= 2) | 343 | if (unit == PM_LSU1L && byte >= 2) |
341 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); | 344 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); |
342 | } | 345 | } |
343 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | 346 | mmcr1 |= (unsigned long)ttm |
347 | << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
344 | } | 348 | } |
345 | 349 | ||
346 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | 350 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ |
@@ -386,7 +390,8 @@ static int p970_compute_mmcr(u64 event[], int n_ev, | |||
386 | for (pmc = 0; pmc < 2; ++pmc) | 390 | for (pmc = 0; pmc < 2; ++pmc) |
387 | mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); | 391 | mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); |
388 | for (; pmc < 8; ++pmc) | 392 | for (; pmc < 8; ++pmc) |
389 | mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); | 393 | mmcr1 |= (unsigned long)pmcsel[pmc] |
394 | << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); | ||
390 | if (pmc_inuse & 1) | 395 | if (pmc_inuse & 1) |
391 | mmcr0 |= MMCR0_PMC1CE; | 396 | mmcr0 |= MMCR0_PMC1CE; |
392 | if (pmc_inuse & 0xfe) | 397 | if (pmc_inuse & 0xfe) |
@@ -401,7 +406,7 @@ static int p970_compute_mmcr(u64 event[], int n_ev, | |||
401 | return 0; | 406 | return 0; |
402 | } | 407 | } |
403 | 408 | ||
404 | static void p970_disable_pmc(unsigned int pmc, u64 mmcr[]) | 409 | static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[]) |
405 | { | 410 | { |
406 | int shift, i; | 411 | int shift, i; |
407 | 412 | ||
@@ -467,16 +472,28 @@ static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | |||
467 | }, | 472 | }, |
468 | }; | 473 | }; |
469 | 474 | ||
470 | struct power_pmu ppc970_pmu = { | 475 | static struct power_pmu ppc970_pmu = { |
471 | .n_counter = 8, | 476 | .name = "PPC970/FX/MP", |
472 | .max_alternatives = 2, | 477 | .n_counter = 8, |
473 | .add_fields = 0x001100005555ull, | 478 | .max_alternatives = 2, |
474 | .test_adder = 0x013300000000ull, | 479 | .add_fields = 0x001100005555ull, |
475 | .compute_mmcr = p970_compute_mmcr, | 480 | .test_adder = 0x013300000000ull, |
476 | .get_constraint = p970_get_constraint, | 481 | .compute_mmcr = p970_compute_mmcr, |
477 | .get_alternatives = p970_get_alternatives, | 482 | .get_constraint = p970_get_constraint, |
478 | .disable_pmc = p970_disable_pmc, | 483 | .get_alternatives = p970_get_alternatives, |
479 | .n_generic = ARRAY_SIZE(ppc970_generic_events), | 484 | .disable_pmc = p970_disable_pmc, |
480 | .generic_events = ppc970_generic_events, | 485 | .n_generic = ARRAY_SIZE(ppc970_generic_events), |
481 | .cache_events = &ppc970_cache_events, | 486 | .generic_events = ppc970_generic_events, |
487 | .cache_events = &ppc970_cache_events, | ||
482 | }; | 488 | }; |
489 | |||
490 | static int init_ppc970_pmu(void) | ||
491 | { | ||
492 | if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970") | ||
493 | && strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970MP")) | ||
494 | return -ENODEV; | ||
495 | |||
496 | return register_power_pmu(&ppc970_pmu); | ||
497 | } | ||
498 | |||
499 | arch_initcall(init_ppc970_pmu); | ||
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 15391c2ab013..eae4511ceeac 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <linux/posix-timers.h> | 53 | #include <linux/posix-timers.h> |
54 | #include <linux/irq.h> | 54 | #include <linux/irq.h> |
55 | #include <linux/delay.h> | 55 | #include <linux/delay.h> |
56 | #include <linux/perf_counter.h> | ||
56 | 57 | ||
57 | #include <asm/io.h> | 58 | #include <asm/io.h> |
58 | #include <asm/processor.h> | 59 | #include <asm/processor.h> |
@@ -525,6 +526,26 @@ void __init iSeries_time_init_early(void) | |||
525 | } | 526 | } |
526 | #endif /* CONFIG_PPC_ISERIES */ | 527 | #endif /* CONFIG_PPC_ISERIES */ |
527 | 528 | ||
529 | #if defined(CONFIG_PERF_COUNTERS) && defined(CONFIG_PPC32) | ||
530 | DEFINE_PER_CPU(u8, perf_counter_pending); | ||
531 | |||
532 | void set_perf_counter_pending(void) | ||
533 | { | ||
534 | get_cpu_var(perf_counter_pending) = 1; | ||
535 | set_dec(1); | ||
536 | put_cpu_var(perf_counter_pending); | ||
537 | } | ||
538 | |||
539 | #define test_perf_counter_pending() __get_cpu_var(perf_counter_pending) | ||
540 | #define clear_perf_counter_pending() __get_cpu_var(perf_counter_pending) = 0 | ||
541 | |||
542 | #else /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ | ||
543 | |||
544 | #define test_perf_counter_pending() 0 | ||
545 | #define clear_perf_counter_pending() | ||
546 | |||
547 | #endif /* CONFIG_PERF_COUNTERS && CONFIG_PPC32 */ | ||
548 | |||
528 | /* | 549 | /* |
529 | * For iSeries shared processors, we have to let the hypervisor | 550 | * For iSeries shared processors, we have to let the hypervisor |
530 | * set the hardware decrementer. We set a virtual decrementer | 551 | * set the hardware decrementer. We set a virtual decrementer |
@@ -551,6 +572,10 @@ void timer_interrupt(struct pt_regs * regs) | |||
551 | set_dec(DECREMENTER_MAX); | 572 | set_dec(DECREMENTER_MAX); |
552 | 573 | ||
553 | #ifdef CONFIG_PPC32 | 574 | #ifdef CONFIG_PPC32 |
575 | if (test_perf_counter_pending()) { | ||
576 | clear_perf_counter_pending(); | ||
577 | perf_counter_do_pending(); | ||
578 | } | ||
554 | if (atomic_read(&ppc_n_lost_interrupts) != 0) | 579 | if (atomic_read(&ppc_n_lost_interrupts) != 0) |
555 | do_IRQ(regs); | 580 | do_IRQ(regs); |
556 | #endif | 581 | #endif |
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index c4192542b809..61187bec7506 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype | |||
@@ -1,7 +1,7 @@ | |||
1 | config PPC64 | 1 | config PPC64 |
2 | bool "64-bit kernel" | 2 | bool "64-bit kernel" |
3 | default n | 3 | default n |
4 | select HAVE_PERF_COUNTERS | 4 | select PPC_HAVE_PMU_SUPPORT |
5 | help | 5 | help |
6 | This option selects whether a 32-bit or a 64-bit kernel | 6 | This option selects whether a 32-bit or a 64-bit kernel |
7 | will be built. | 7 | will be built. |
@@ -78,6 +78,7 @@ config POWER4_ONLY | |||
78 | config 6xx | 78 | config 6xx |
79 | def_bool y | 79 | def_bool y |
80 | depends on PPC32 && PPC_BOOK3S | 80 | depends on PPC32 && PPC_BOOK3S |
81 | select PPC_HAVE_PMU_SUPPORT | ||
81 | 82 | ||
82 | config POWER3 | 83 | config POWER3 |
83 | bool | 84 | bool |
@@ -246,6 +247,15 @@ config VIRT_CPU_ACCOUNTING | |||
246 | 247 | ||
247 | If in doubt, say Y here. | 248 | If in doubt, say Y here. |
248 | 249 | ||
250 | config PPC_HAVE_PMU_SUPPORT | ||
251 | bool | ||
252 | |||
253 | config PPC_PERF_CTRS | ||
254 | def_bool y | ||
255 | depends on PERF_COUNTERS && PPC_HAVE_PMU_SUPPORT | ||
256 | help | ||
257 | This enables the powerpc-specific perf_counter back-end. | ||
258 | |||
249 | config SMP | 259 | config SMP |
250 | depends on PPC_STD_MMU || FSL_BOOKE | 260 | depends on PPC_STD_MMU || FSL_BOOKE |
251 | bool "Symmetric multi-processing support" | 261 | bool "Symmetric multi-processing support" |
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h index 876ed97147b3..5fb33e160ea0 100644 --- a/arch/x86/include/asm/perf_counter.h +++ b/arch/x86/include/asm/perf_counter.h | |||
@@ -84,11 +84,6 @@ union cpuid10_edx { | |||
84 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b | 84 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b |
85 | #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) | 85 | #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) |
86 | 86 | ||
87 | extern void set_perf_counter_pending(void); | ||
88 | |||
89 | #define clear_perf_counter_pending() do { } while (0) | ||
90 | #define test_perf_counter_pending() (0) | ||
91 | |||
92 | #ifdef CONFIG_PERF_COUNTERS | 87 | #ifdef CONFIG_PERF_COUNTERS |
93 | extern void init_hw_perf_counters(void); | 88 | extern void init_hw_perf_counters(void); |
94 | extern void perf_counters_lapic_init(void); | 89 | extern void perf_counters_lapic_init(void); |
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 31bd120cf2a2..01fd9461d323 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h | |||
@@ -49,13 +49,17 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); | |||
49 | #endif | 49 | #endif |
50 | 50 | ||
51 | #if defined(CONFIG_HIGHPTE) | 51 | #if defined(CONFIG_HIGHPTE) |
52 | #define __KM_PTE \ | ||
53 | (in_nmi() ? KM_NMI_PTE : \ | ||
54 | in_irq() ? KM_IRQ_PTE : \ | ||
55 | KM_PTE0) | ||
52 | #define pte_offset_map(dir, address) \ | 56 | #define pte_offset_map(dir, address) \ |
53 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ | 57 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \ |
54 | pte_index((address))) | 58 | pte_index((address))) |
55 | #define pte_offset_map_nested(dir, address) \ | 59 | #define pte_offset_map_nested(dir, address) \ |
56 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ | 60 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ |
57 | pte_index((address))) | 61 | pte_index((address))) |
58 | #define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0) | 62 | #define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) |
59 | #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) | 63 | #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) |
60 | #else | 64 | #else |
61 | #define pte_offset_map(dir, address) \ | 65 | #define pte_offset_map(dir, address) \ |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index b685ece89d5c..512ee87062c2 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -25,7 +25,12 @@ | |||
25 | #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) | 25 | #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) |
26 | 26 | ||
27 | #define KERNEL_DS MAKE_MM_SEG(-1UL) | 27 | #define KERNEL_DS MAKE_MM_SEG(-1UL) |
28 | #define USER_DS MAKE_MM_SEG(PAGE_OFFSET) | 28 | |
29 | #ifdef CONFIG_X86_32 | ||
30 | # define USER_DS MAKE_MM_SEG(PAGE_OFFSET) | ||
31 | #else | ||
32 | # define USER_DS MAKE_MM_SEG(__VIRTUAL_MASK) | ||
33 | #endif | ||
29 | 34 | ||
30 | #define get_ds() (KERNEL_DS) | 35 | #define get_ds() (KERNEL_DS) |
31 | #define get_fs() (current_thread_info()->addr_limit) | 36 | #define get_fs() (current_thread_info()->addr_limit) |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 275bc142cd5d..76dfef23f789 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/kdebug.h> | 19 | #include <linux/kdebug.h> |
20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/highmem.h> | ||
22 | 23 | ||
23 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
24 | #include <asm/stacktrace.h> | 25 | #include <asm/stacktrace.h> |
@@ -389,23 +390,23 @@ static u64 intel_pmu_raw_event(u64 event) | |||
389 | return event & CORE_EVNTSEL_MASK; | 390 | return event & CORE_EVNTSEL_MASK; |
390 | } | 391 | } |
391 | 392 | ||
392 | static const u64 amd_0f_hw_cache_event_ids | 393 | static const u64 amd_hw_cache_event_ids |
393 | [PERF_COUNT_HW_CACHE_MAX] | 394 | [PERF_COUNT_HW_CACHE_MAX] |
394 | [PERF_COUNT_HW_CACHE_OP_MAX] | 395 | [PERF_COUNT_HW_CACHE_OP_MAX] |
395 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | 396 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
396 | { | 397 | { |
397 | [ C(L1D) ] = { | 398 | [ C(L1D) ] = { |
398 | [ C(OP_READ) ] = { | 399 | [ C(OP_READ) ] = { |
399 | [ C(RESULT_ACCESS) ] = 0, | 400 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
400 | [ C(RESULT_MISS) ] = 0, | 401 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ |
401 | }, | 402 | }, |
402 | [ C(OP_WRITE) ] = { | 403 | [ C(OP_WRITE) ] = { |
403 | [ C(RESULT_ACCESS) ] = 0, | 404 | [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */ |
404 | [ C(RESULT_MISS) ] = 0, | 405 | [ C(RESULT_MISS) ] = 0, |
405 | }, | 406 | }, |
406 | [ C(OP_PREFETCH) ] = { | 407 | [ C(OP_PREFETCH) ] = { |
407 | [ C(RESULT_ACCESS) ] = 0, | 408 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ |
408 | [ C(RESULT_MISS) ] = 0, | 409 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ |
409 | }, | 410 | }, |
410 | }, | 411 | }, |
411 | [ C(L1I ) ] = { | 412 | [ C(L1I ) ] = { |
@@ -418,17 +419,17 @@ static const u64 amd_0f_hw_cache_event_ids | |||
418 | [ C(RESULT_MISS) ] = -1, | 419 | [ C(RESULT_MISS) ] = -1, |
419 | }, | 420 | }, |
420 | [ C(OP_PREFETCH) ] = { | 421 | [ C(OP_PREFETCH) ] = { |
421 | [ C(RESULT_ACCESS) ] = 0, | 422 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ |
422 | [ C(RESULT_MISS) ] = 0, | 423 | [ C(RESULT_MISS) ] = 0, |
423 | }, | 424 | }, |
424 | }, | 425 | }, |
425 | [ C(LL ) ] = { | 426 | [ C(LL ) ] = { |
426 | [ C(OP_READ) ] = { | 427 | [ C(OP_READ) ] = { |
427 | [ C(RESULT_ACCESS) ] = 0, | 428 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ |
428 | [ C(RESULT_MISS) ] = 0, | 429 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ |
429 | }, | 430 | }, |
430 | [ C(OP_WRITE) ] = { | 431 | [ C(OP_WRITE) ] = { |
431 | [ C(RESULT_ACCESS) ] = 0, | 432 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ |
432 | [ C(RESULT_MISS) ] = 0, | 433 | [ C(RESULT_MISS) ] = 0, |
433 | }, | 434 | }, |
434 | [ C(OP_PREFETCH) ] = { | 435 | [ C(OP_PREFETCH) ] = { |
@@ -438,8 +439,8 @@ static const u64 amd_0f_hw_cache_event_ids | |||
438 | }, | 439 | }, |
439 | [ C(DTLB) ] = { | 440 | [ C(DTLB) ] = { |
440 | [ C(OP_READ) ] = { | 441 | [ C(OP_READ) ] = { |
441 | [ C(RESULT_ACCESS) ] = 0, | 442 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ |
442 | [ C(RESULT_MISS) ] = 0, | 443 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ |
443 | }, | 444 | }, |
444 | [ C(OP_WRITE) ] = { | 445 | [ C(OP_WRITE) ] = { |
445 | [ C(RESULT_ACCESS) ] = 0, | 446 | [ C(RESULT_ACCESS) ] = 0, |
@@ -1223,6 +1224,8 @@ again: | |||
1223 | if (!intel_pmu_save_and_restart(counter)) | 1224 | if (!intel_pmu_save_and_restart(counter)) |
1224 | continue; | 1225 | continue; |
1225 | 1226 | ||
1227 | data.period = counter->hw.last_period; | ||
1228 | |||
1226 | if (perf_counter_overflow(counter, 1, &data)) | 1229 | if (perf_counter_overflow(counter, 1, &data)) |
1227 | intel_pmu_disable_counter(&counter->hw, bit); | 1230 | intel_pmu_disable_counter(&counter->hw, bit); |
1228 | } | 1231 | } |
@@ -1459,18 +1462,16 @@ static int intel_pmu_init(void) | |||
1459 | 1462 | ||
1460 | static int amd_pmu_init(void) | 1463 | static int amd_pmu_init(void) |
1461 | { | 1464 | { |
1465 | /* Performance-monitoring supported from K7 and later: */ | ||
1466 | if (boot_cpu_data.x86 < 6) | ||
1467 | return -ENODEV; | ||
1468 | |||
1462 | x86_pmu = amd_pmu; | 1469 | x86_pmu = amd_pmu; |
1463 | 1470 | ||
1464 | switch (boot_cpu_data.x86) { | 1471 | /* Events are common for all AMDs */ |
1465 | case 0x0f: | 1472 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, |
1466 | case 0x10: | 1473 | sizeof(hw_cache_event_ids)); |
1467 | case 0x11: | ||
1468 | memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids, | ||
1469 | sizeof(hw_cache_event_ids)); | ||
1470 | 1474 | ||
1471 | pr_cont("AMD Family 0f/10/11 events, "); | ||
1472 | break; | ||
1473 | } | ||
1474 | return 0; | 1475 | return 0; |
1475 | } | 1476 | } |
1476 | 1477 | ||
@@ -1554,9 +1555,9 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | |||
1554 | */ | 1555 | */ |
1555 | 1556 | ||
1556 | static inline | 1557 | static inline |
1557 | void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) | 1558 | void callchain_store(struct perf_callchain_entry *entry, u64 ip) |
1558 | { | 1559 | { |
1559 | if (entry->nr < MAX_STACK_DEPTH) | 1560 | if (entry->nr < PERF_MAX_STACK_DEPTH) |
1560 | entry->ip[entry->nr++] = ip; | 1561 | entry->ip[entry->nr++] = ip; |
1561 | } | 1562 | } |
1562 | 1563 | ||
@@ -1577,8 +1578,8 @@ static void backtrace_warning(void *data, char *msg) | |||
1577 | 1578 | ||
1578 | static int backtrace_stack(void *data, char *name) | 1579 | static int backtrace_stack(void *data, char *name) |
1579 | { | 1580 | { |
1580 | /* Don't bother with IRQ stacks for now */ | 1581 | /* Process all stacks: */ |
1581 | return -1; | 1582 | return 0; |
1582 | } | 1583 | } |
1583 | 1584 | ||
1584 | static void backtrace_address(void *data, unsigned long addr, int reliable) | 1585 | static void backtrace_address(void *data, unsigned long addr, int reliable) |
@@ -1596,47 +1597,59 @@ static const struct stacktrace_ops backtrace_ops = { | |||
1596 | .address = backtrace_address, | 1597 | .address = backtrace_address, |
1597 | }; | 1598 | }; |
1598 | 1599 | ||
1600 | #include "../dumpstack.h" | ||
1601 | |||
1599 | static void | 1602 | static void |
1600 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1603 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) |
1601 | { | 1604 | { |
1602 | unsigned long bp; | 1605 | callchain_store(entry, PERF_CONTEXT_KERNEL); |
1603 | char *stack; | 1606 | callchain_store(entry, regs->ip); |
1604 | int nr = entry->nr; | ||
1605 | 1607 | ||
1606 | callchain_store(entry, instruction_pointer(regs)); | 1608 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); |
1609 | } | ||
1607 | 1610 | ||
1608 | stack = ((char *)regs + sizeof(struct pt_regs)); | 1611 | /* |
1609 | #ifdef CONFIG_FRAME_POINTER | 1612 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context |
1610 | bp = frame_pointer(regs); | 1613 | */ |
1611 | #else | 1614 | static unsigned long |
1612 | bp = 0; | 1615 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) |
1613 | #endif | 1616 | { |
1617 | unsigned long offset, addr = (unsigned long)from; | ||
1618 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
1619 | unsigned long size, len = 0; | ||
1620 | struct page *page; | ||
1621 | void *map; | ||
1622 | int ret; | ||
1614 | 1623 | ||
1615 | dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); | 1624 | do { |
1625 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
1626 | if (!ret) | ||
1627 | break; | ||
1616 | 1628 | ||
1617 | entry->kernel = entry->nr - nr; | 1629 | offset = addr & (PAGE_SIZE - 1); |
1618 | } | 1630 | size = min(PAGE_SIZE - offset, n - len); |
1619 | 1631 | ||
1632 | map = kmap_atomic(page, type); | ||
1633 | memcpy(to, map+offset, size); | ||
1634 | kunmap_atomic(map, type); | ||
1635 | put_page(page); | ||
1620 | 1636 | ||
1621 | struct stack_frame { | 1637 | len += size; |
1622 | const void __user *next_fp; | 1638 | to += size; |
1623 | unsigned long return_address; | 1639 | addr += size; |
1624 | }; | 1640 | |
1641 | } while (len < n); | ||
1642 | |||
1643 | return len; | ||
1644 | } | ||
1625 | 1645 | ||
1626 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 1646 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) |
1627 | { | 1647 | { |
1628 | int ret; | 1648 | unsigned long bytes; |
1629 | 1649 | ||
1630 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | 1650 | bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); |
1631 | return 0; | ||
1632 | 1651 | ||
1633 | ret = 1; | 1652 | return bytes == sizeof(*frame); |
1634 | pagefault_disable(); | ||
1635 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
1636 | ret = 0; | ||
1637 | pagefault_enable(); | ||
1638 | |||
1639 | return ret; | ||
1640 | } | 1653 | } |
1641 | 1654 | ||
1642 | static void | 1655 | static void |
@@ -1644,28 +1657,28 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
1644 | { | 1657 | { |
1645 | struct stack_frame frame; | 1658 | struct stack_frame frame; |
1646 | const void __user *fp; | 1659 | const void __user *fp; |
1647 | int nr = entry->nr; | ||
1648 | 1660 | ||
1649 | regs = (struct pt_regs *)current->thread.sp0 - 1; | 1661 | if (!user_mode(regs)) |
1650 | fp = (void __user *)regs->bp; | 1662 | regs = task_pt_regs(current); |
1651 | 1663 | ||
1664 | fp = (void __user *)regs->bp; | ||
1665 | |||
1666 | callchain_store(entry, PERF_CONTEXT_USER); | ||
1652 | callchain_store(entry, regs->ip); | 1667 | callchain_store(entry, regs->ip); |
1653 | 1668 | ||
1654 | while (entry->nr < MAX_STACK_DEPTH) { | 1669 | while (entry->nr < PERF_MAX_STACK_DEPTH) { |
1655 | frame.next_fp = NULL; | 1670 | frame.next_frame = NULL; |
1656 | frame.return_address = 0; | 1671 | frame.return_address = 0; |
1657 | 1672 | ||
1658 | if (!copy_stack_frame(fp, &frame)) | 1673 | if (!copy_stack_frame(fp, &frame)) |
1659 | break; | 1674 | break; |
1660 | 1675 | ||
1661 | if ((unsigned long)fp < user_stack_pointer(regs)) | 1676 | if ((unsigned long)fp < regs->sp) |
1662 | break; | 1677 | break; |
1663 | 1678 | ||
1664 | callchain_store(entry, frame.return_address); | 1679 | callchain_store(entry, frame.return_address); |
1665 | fp = frame.next_fp; | 1680 | fp = frame.next_frame; |
1666 | } | 1681 | } |
1667 | |||
1668 | entry->user = entry->nr - nr; | ||
1669 | } | 1682 | } |
1670 | 1683 | ||
1671 | static void | 1684 | static void |
@@ -1701,9 +1714,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1701 | entry = &__get_cpu_var(irq_entry); | 1714 | entry = &__get_cpu_var(irq_entry); |
1702 | 1715 | ||
1703 | entry->nr = 0; | 1716 | entry->nr = 0; |
1704 | entry->hv = 0; | ||
1705 | entry->kernel = 0; | ||
1706 | entry->user = 0; | ||
1707 | 1717 | ||
1708 | perf_do_callchain(regs, entry); | 1718 | perf_do_callchain(regs, entry); |
1709 | 1719 | ||
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index f97480941269..71da1bca13cb 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -14,7 +14,7 @@ | |||
14 | static inline pte_t gup_get_pte(pte_t *ptep) | 14 | static inline pte_t gup_get_pte(pte_t *ptep) |
15 | { | 15 | { |
16 | #ifndef CONFIG_X86_PAE | 16 | #ifndef CONFIG_X86_PAE |
17 | return *ptep; | 17 | return ACCESS_ONCE(*ptep); |
18 | #else | 18 | #else |
19 | /* | 19 | /* |
20 | * With get_user_pages_fast, we walk down the pagetables without taking | 20 | * With get_user_pages_fast, we walk down the pagetables without taking |
@@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, | |||
219 | return 1; | 219 | return 1; |
220 | } | 220 | } |
221 | 221 | ||
222 | /* | ||
223 | * Like get_user_pages_fast() except its IRQ-safe in that it won't fall | ||
224 | * back to the regular GUP. | ||
225 | */ | ||
226 | int __get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
227 | struct page **pages) | ||
228 | { | ||
229 | struct mm_struct *mm = current->mm; | ||
230 | unsigned long addr, len, end; | ||
231 | unsigned long next; | ||
232 | unsigned long flags; | ||
233 | pgd_t *pgdp; | ||
234 | int nr = 0; | ||
235 | |||
236 | start &= PAGE_MASK; | ||
237 | addr = start; | ||
238 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
239 | end = start + len; | ||
240 | if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, | ||
241 | (void __user *)start, len))) | ||
242 | return 0; | ||
243 | |||
244 | /* | ||
245 | * XXX: batch / limit 'nr', to avoid large irq off latency | ||
246 | * needs some instrumenting to determine the common sizes used by | ||
247 | * important workloads (eg. DB2), and whether limiting the batch size | ||
248 | * will decrease performance. | ||
249 | * | ||
250 | * It seems like we're in the clear for the moment. Direct-IO is | ||
251 | * the main guy that batches up lots of get_user_pages, and even | ||
252 | * they are limited to 64-at-a-time which is not so many. | ||
253 | */ | ||
254 | /* | ||
255 | * This doesn't prevent pagetable teardown, but does prevent | ||
256 | * the pagetables and pages from being freed on x86. | ||
257 | * | ||
258 | * So long as we atomically load page table pointers versus teardown | ||
259 | * (which we do on x86, with the above PAE exception), we can follow the | ||
260 | * address down to the the page and take a ref on it. | ||
261 | */ | ||
262 | local_irq_save(flags); | ||
263 | pgdp = pgd_offset(mm, addr); | ||
264 | do { | ||
265 | pgd_t pgd = *pgdp; | ||
266 | |||
267 | next = pgd_addr_end(addr, end); | ||
268 | if (pgd_none(pgd)) | ||
269 | break; | ||
270 | if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) | ||
271 | break; | ||
272 | } while (pgdp++, addr = next, addr != end); | ||
273 | local_irq_restore(flags); | ||
274 | |||
275 | return nr; | ||
276 | } | ||
277 | |||
222 | /** | 278 | /** |
223 | * get_user_pages_fast() - pin user pages in memory | 279 | * get_user_pages_fast() - pin user pages in memory |
224 | * @start: starting user address | 280 | * @start: starting user address |
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 1dd96d4406c0..47d4a01c5393 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c | |||
@@ -52,6 +52,19 @@ static const struct dentry_operations anon_inodefs_dentry_operations = { | |||
52 | .d_delete = anon_inodefs_delete_dentry, | 52 | .d_delete = anon_inodefs_delete_dentry, |
53 | }; | 53 | }; |
54 | 54 | ||
55 | /* | ||
56 | * nop .set_page_dirty method so that people can use .page_mkwrite on | ||
57 | * anon inodes. | ||
58 | */ | ||
59 | static int anon_set_page_dirty(struct page *page) | ||
60 | { | ||
61 | return 0; | ||
62 | }; | ||
63 | |||
64 | static const struct address_space_operations anon_aops = { | ||
65 | .set_page_dirty = anon_set_page_dirty, | ||
66 | }; | ||
67 | |||
55 | /** | 68 | /** |
56 | * anon_inode_getfd - creates a new file instance by hooking it up to an | 69 | * anon_inode_getfd - creates a new file instance by hooking it up to an |
57 | * anonymous inode, and a dentry that describe the "class" | 70 | * anonymous inode, and a dentry that describe the "class" |
@@ -151,6 +164,8 @@ static struct inode *anon_inode_mkinode(void) | |||
151 | 164 | ||
152 | inode->i_fop = &anon_inode_fops; | 165 | inode->i_fop = &anon_inode_fops; |
153 | 166 | ||
167 | inode->i_mapping->a_ops = &anon_aops; | ||
168 | |||
154 | /* | 169 | /* |
155 | * Mark the inode dirty from the very beginning, | 170 | * Mark the inode dirty from the very beginning, |
156 | * that way it will never be moved to the dirty | 171 | * that way it will never be moved to the dirty |
diff --git a/include/asm-generic/kmap_types.h b/include/asm-generic/kmap_types.h index 54e8b3d956b7..eddbce0f9fb9 100644 --- a/include/asm-generic/kmap_types.h +++ b/include/asm-generic/kmap_types.h | |||
@@ -24,7 +24,10 @@ D(12) KM_SOFTIRQ1, | |||
24 | D(13) KM_SYNC_ICACHE, | 24 | D(13) KM_SYNC_ICACHE, |
25 | D(14) KM_SYNC_DCACHE, | 25 | D(14) KM_SYNC_DCACHE, |
26 | D(15) KM_UML_USERCOPY, /* UML specific, for copy_*_user - used in do_op_one_page */ | 26 | D(15) KM_UML_USERCOPY, /* UML specific, for copy_*_user - used in do_op_one_page */ |
27 | D(16) KM_TYPE_NR | 27 | D(16) KM_IRQ_PTE, |
28 | D(17) KM_NMI, | ||
29 | D(18) KM_NMI_PTE, | ||
30 | D(19) KM_TYPE_NR | ||
28 | }; | 31 | }; |
29 | 32 | ||
30 | #undef D | 33 | #undef D |
diff --git a/include/linux/mm.h b/include/linux/mm.h index d88d6fc530ad..cf260d848eb9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -854,6 +854,12 @@ extern int mprotect_fixup(struct vm_area_struct *vma, | |||
854 | unsigned long end, unsigned long newflags); | 854 | unsigned long end, unsigned long newflags); |
855 | 855 | ||
856 | /* | 856 | /* |
857 | * doesn't attempt to fault and will return short. | ||
858 | */ | ||
859 | int __get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
860 | struct page **pages); | ||
861 | |||
862 | /* | ||
857 | * A callback you can register to apply pressure to ageable caches. | 863 | * A callback you can register to apply pressure to ageable caches. |
858 | * | 864 | * |
859 | * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should | 865 | * 'shrink' is passed a count 'nr_to_scan' and a 'gfpmask'. It should |
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 1b3118a1023a..89698d8aba5c 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h | |||
@@ -236,10 +236,16 @@ struct perf_counter_mmap_page { | |||
236 | /* | 236 | /* |
237 | * Control data for the mmap() data buffer. | 237 | * Control data for the mmap() data buffer. |
238 | * | 238 | * |
239 | * User-space reading this value should issue an rmb(), on SMP capable | 239 | * User-space reading the @data_head value should issue an rmb(), on |
240 | * platforms, after reading this value -- see perf_counter_wakeup(). | 240 | * SMP capable platforms, after reading this value -- see |
241 | * perf_counter_wakeup(). | ||
242 | * | ||
243 | * When the mapping is PROT_WRITE the @data_tail value should be | ||
244 | * written by userspace to reflect the last read data. In this case | ||
245 | * the kernel will not over-write unread data. | ||
241 | */ | 246 | */ |
242 | __u64 data_head; /* head in the data section */ | 247 | __u64 data_head; /* head in the data section */ |
248 | __u64 data_tail; /* user-space written tail */ | ||
243 | }; | 249 | }; |
244 | 250 | ||
245 | #define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) | 251 | #define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) |
@@ -275,6 +281,15 @@ enum perf_event_type { | |||
275 | 281 | ||
276 | /* | 282 | /* |
277 | * struct { | 283 | * struct { |
284 | * struct perf_event_header header; | ||
285 | * u64 id; | ||
286 | * u64 lost; | ||
287 | * }; | ||
288 | */ | ||
289 | PERF_EVENT_LOST = 2, | ||
290 | |||
291 | /* | ||
292 | * struct { | ||
278 | * struct perf_event_header header; | 293 | * struct perf_event_header header; |
279 | * | 294 | * |
280 | * u32 pid, tid; | 295 | * u32 pid, tid; |
@@ -313,30 +328,39 @@ enum perf_event_type { | |||
313 | 328 | ||
314 | /* | 329 | /* |
315 | * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field | 330 | * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field |
316 | * will be PERF_RECORD_* | 331 | * will be PERF_SAMPLE_* |
317 | * | 332 | * |
318 | * struct { | 333 | * struct { |
319 | * struct perf_event_header header; | 334 | * struct perf_event_header header; |
320 | * | 335 | * |
321 | * { u64 ip; } && PERF_RECORD_IP | 336 | * { u64 ip; } && PERF_SAMPLE_IP |
322 | * { u32 pid, tid; } && PERF_RECORD_TID | 337 | * { u32 pid, tid; } && PERF_SAMPLE_TID |
323 | * { u64 time; } && PERF_RECORD_TIME | 338 | * { u64 time; } && PERF_SAMPLE_TIME |
324 | * { u64 addr; } && PERF_RECORD_ADDR | 339 | * { u64 addr; } && PERF_SAMPLE_ADDR |
325 | * { u64 config; } && PERF_RECORD_CONFIG | 340 | * { u64 config; } && PERF_SAMPLE_CONFIG |
326 | * { u32 cpu, res; } && PERF_RECORD_CPU | 341 | * { u32 cpu, res; } && PERF_SAMPLE_CPU |
327 | * | 342 | * |
328 | * { u64 nr; | 343 | * { u64 nr; |
329 | * { u64 id, val; } cnt[nr]; } && PERF_RECORD_GROUP | 344 | * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP |
330 | * | 345 | * |
331 | * { u16 nr, | 346 | * { u64 nr, |
332 | * hv, | 347 | * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN |
333 | * kernel, | ||
334 | * user; | ||
335 | * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN | ||
336 | * }; | 348 | * }; |
337 | */ | 349 | */ |
338 | }; | 350 | }; |
339 | 351 | ||
352 | enum perf_callchain_context { | ||
353 | PERF_CONTEXT_HV = (__u64)-32, | ||
354 | PERF_CONTEXT_KERNEL = (__u64)-128, | ||
355 | PERF_CONTEXT_USER = (__u64)-512, | ||
356 | |||
357 | PERF_CONTEXT_GUEST = (__u64)-2048, | ||
358 | PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, | ||
359 | PERF_CONTEXT_GUEST_USER = (__u64)-2560, | ||
360 | |||
361 | PERF_CONTEXT_MAX = (__u64)-4095, | ||
362 | }; | ||
363 | |||
340 | #ifdef __KERNEL__ | 364 | #ifdef __KERNEL__ |
341 | /* | 365 | /* |
342 | * Kernel-internal data types and definitions: | 366 | * Kernel-internal data types and definitions: |
@@ -356,6 +380,13 @@ enum perf_event_type { | |||
356 | #include <linux/pid_namespace.h> | 380 | #include <linux/pid_namespace.h> |
357 | #include <asm/atomic.h> | 381 | #include <asm/atomic.h> |
358 | 382 | ||
383 | #define PERF_MAX_STACK_DEPTH 255 | ||
384 | |||
385 | struct perf_callchain_entry { | ||
386 | __u64 nr; | ||
387 | __u64 ip[PERF_MAX_STACK_DEPTH]; | ||
388 | }; | ||
389 | |||
359 | struct task_struct; | 390 | struct task_struct; |
360 | 391 | ||
361 | /** | 392 | /** |
@@ -414,6 +445,7 @@ struct file; | |||
414 | struct perf_mmap_data { | 445 | struct perf_mmap_data { |
415 | struct rcu_head rcu_head; | 446 | struct rcu_head rcu_head; |
416 | int nr_pages; /* nr of data pages */ | 447 | int nr_pages; /* nr of data pages */ |
448 | int writable; /* are we writable */ | ||
417 | int nr_locked; /* nr pages mlocked */ | 449 | int nr_locked; /* nr pages mlocked */ |
418 | 450 | ||
419 | atomic_t poll; /* POLL_ for wakeups */ | 451 | atomic_t poll; /* POLL_ for wakeups */ |
@@ -423,8 +455,8 @@ struct perf_mmap_data { | |||
423 | atomic_long_t done_head; /* completed head */ | 455 | atomic_long_t done_head; /* completed head */ |
424 | 456 | ||
425 | atomic_t lock; /* concurrent writes */ | 457 | atomic_t lock; /* concurrent writes */ |
426 | |||
427 | atomic_t wakeup; /* needs a wakeup */ | 458 | atomic_t wakeup; /* needs a wakeup */ |
459 | atomic_t lost; /* nr records lost */ | ||
428 | 460 | ||
429 | struct perf_counter_mmap_page *user_page; | 461 | struct perf_counter_mmap_page *user_page; |
430 | void *data_pages[0]; | 462 | void *data_pages[0]; |
@@ -604,6 +636,7 @@ extern void perf_counter_task_tick(struct task_struct *task, int cpu); | |||
604 | extern int perf_counter_init_task(struct task_struct *child); | 636 | extern int perf_counter_init_task(struct task_struct *child); |
605 | extern void perf_counter_exit_task(struct task_struct *child); | 637 | extern void perf_counter_exit_task(struct task_struct *child); |
606 | extern void perf_counter_free_task(struct task_struct *task); | 638 | extern void perf_counter_free_task(struct task_struct *task); |
639 | extern void set_perf_counter_pending(void); | ||
607 | extern void perf_counter_do_pending(void); | 640 | extern void perf_counter_do_pending(void); |
608 | extern void perf_counter_print_debug(void); | 641 | extern void perf_counter_print_debug(void); |
609 | extern void __perf_disable(void); | 642 | extern void __perf_disable(void); |
@@ -649,18 +682,6 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma) | |||
649 | extern void perf_counter_comm(struct task_struct *tsk); | 682 | extern void perf_counter_comm(struct task_struct *tsk); |
650 | extern void perf_counter_fork(struct task_struct *tsk); | 683 | extern void perf_counter_fork(struct task_struct *tsk); |
651 | 684 | ||
652 | extern void perf_counter_task_migration(struct task_struct *task, int cpu); | ||
653 | |||
654 | #define MAX_STACK_DEPTH 255 | ||
655 | |||
656 | struct perf_callchain_entry { | ||
657 | u16 nr; | ||
658 | u16 hv; | ||
659 | u16 kernel; | ||
660 | u16 user; | ||
661 | u64 ip[MAX_STACK_DEPTH]; | ||
662 | }; | ||
663 | |||
664 | extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); | 685 | extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); |
665 | 686 | ||
666 | extern int sysctl_perf_counter_paranoid; | 687 | extern int sysctl_perf_counter_paranoid; |
@@ -701,8 +722,6 @@ static inline void perf_counter_mmap(struct vm_area_struct *vma) { } | |||
701 | static inline void perf_counter_comm(struct task_struct *tsk) { } | 722 | static inline void perf_counter_comm(struct task_struct *tsk) { } |
702 | static inline void perf_counter_fork(struct task_struct *tsk) { } | 723 | static inline void perf_counter_fork(struct task_struct *tsk) { } |
703 | static inline void perf_counter_init(void) { } | 724 | static inline void perf_counter_init(void) { } |
704 | static inline void perf_counter_task_migration(struct task_struct *task, | ||
705 | int cpu) { } | ||
706 | #endif | 725 | #endif |
707 | 726 | ||
708 | #endif /* __KERNEL__ */ | 727 | #endif /* __KERNEL__ */ |
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 29b685f551aa..1a933a221ea4 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c | |||
@@ -124,7 +124,7 @@ void perf_enable(void) | |||
124 | 124 | ||
125 | static void get_ctx(struct perf_counter_context *ctx) | 125 | static void get_ctx(struct perf_counter_context *ctx) |
126 | { | 126 | { |
127 | atomic_inc(&ctx->refcount); | 127 | WARN_ON(!atomic_inc_not_zero(&ctx->refcount)); |
128 | } | 128 | } |
129 | 129 | ||
130 | static void free_ctx(struct rcu_head *head) | 130 | static void free_ctx(struct rcu_head *head) |
@@ -175,6 +175,11 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) | |||
175 | spin_unlock_irqrestore(&ctx->lock, *flags); | 175 | spin_unlock_irqrestore(&ctx->lock, *flags); |
176 | goto retry; | 176 | goto retry; |
177 | } | 177 | } |
178 | |||
179 | if (!atomic_inc_not_zero(&ctx->refcount)) { | ||
180 | spin_unlock_irqrestore(&ctx->lock, *flags); | ||
181 | ctx = NULL; | ||
182 | } | ||
178 | } | 183 | } |
179 | rcu_read_unlock(); | 184 | rcu_read_unlock(); |
180 | return ctx; | 185 | return ctx; |
@@ -193,7 +198,6 @@ static struct perf_counter_context *perf_pin_task_context(struct task_struct *ta | |||
193 | ctx = perf_lock_task_context(task, &flags); | 198 | ctx = perf_lock_task_context(task, &flags); |
194 | if (ctx) { | 199 | if (ctx) { |
195 | ++ctx->pin_count; | 200 | ++ctx->pin_count; |
196 | get_ctx(ctx); | ||
197 | spin_unlock_irqrestore(&ctx->lock, flags); | 201 | spin_unlock_irqrestore(&ctx->lock, flags); |
198 | } | 202 | } |
199 | return ctx; | 203 | return ctx; |
@@ -1283,7 +1287,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) | |||
1283 | if (!interrupts) { | 1287 | if (!interrupts) { |
1284 | perf_disable(); | 1288 | perf_disable(); |
1285 | counter->pmu->disable(counter); | 1289 | counter->pmu->disable(counter); |
1286 | atomic_set(&hwc->period_left, 0); | 1290 | atomic64_set(&hwc->period_left, 0); |
1287 | counter->pmu->enable(counter); | 1291 | counter->pmu->enable(counter); |
1288 | perf_enable(); | 1292 | perf_enable(); |
1289 | } | 1293 | } |
@@ -1459,11 +1463,6 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu) | |||
1459 | put_ctx(parent_ctx); | 1463 | put_ctx(parent_ctx); |
1460 | ctx->parent_ctx = NULL; /* no longer a clone */ | 1464 | ctx->parent_ctx = NULL; /* no longer a clone */ |
1461 | } | 1465 | } |
1462 | /* | ||
1463 | * Get an extra reference before dropping the lock so that | ||
1464 | * this context won't get freed if the task exits. | ||
1465 | */ | ||
1466 | get_ctx(ctx); | ||
1467 | spin_unlock_irqrestore(&ctx->lock, flags); | 1466 | spin_unlock_irqrestore(&ctx->lock, flags); |
1468 | } | 1467 | } |
1469 | 1468 | ||
@@ -1553,7 +1552,7 @@ static int perf_release(struct inode *inode, struct file *file) | |||
1553 | static ssize_t | 1552 | static ssize_t |
1554 | perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) | 1553 | perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) |
1555 | { | 1554 | { |
1556 | u64 values[3]; | 1555 | u64 values[4]; |
1557 | int n; | 1556 | int n; |
1558 | 1557 | ||
1559 | /* | 1558 | /* |
@@ -1620,22 +1619,6 @@ static void perf_counter_reset(struct perf_counter *counter) | |||
1620 | perf_counter_update_userpage(counter); | 1619 | perf_counter_update_userpage(counter); |
1621 | } | 1620 | } |
1622 | 1621 | ||
1623 | static void perf_counter_for_each_sibling(struct perf_counter *counter, | ||
1624 | void (*func)(struct perf_counter *)) | ||
1625 | { | ||
1626 | struct perf_counter_context *ctx = counter->ctx; | ||
1627 | struct perf_counter *sibling; | ||
1628 | |||
1629 | WARN_ON_ONCE(ctx->parent_ctx); | ||
1630 | mutex_lock(&ctx->mutex); | ||
1631 | counter = counter->group_leader; | ||
1632 | |||
1633 | func(counter); | ||
1634 | list_for_each_entry(sibling, &counter->sibling_list, list_entry) | ||
1635 | func(sibling); | ||
1636 | mutex_unlock(&ctx->mutex); | ||
1637 | } | ||
1638 | |||
1639 | /* | 1622 | /* |
1640 | * Holding the top-level counter's child_mutex means that any | 1623 | * Holding the top-level counter's child_mutex means that any |
1641 | * descendant process that has inherited this counter will block | 1624 | * descendant process that has inherited this counter will block |
@@ -1658,14 +1641,18 @@ static void perf_counter_for_each_child(struct perf_counter *counter, | |||
1658 | static void perf_counter_for_each(struct perf_counter *counter, | 1641 | static void perf_counter_for_each(struct perf_counter *counter, |
1659 | void (*func)(struct perf_counter *)) | 1642 | void (*func)(struct perf_counter *)) |
1660 | { | 1643 | { |
1661 | struct perf_counter *child; | 1644 | struct perf_counter_context *ctx = counter->ctx; |
1645 | struct perf_counter *sibling; | ||
1662 | 1646 | ||
1663 | WARN_ON_ONCE(counter->ctx->parent_ctx); | 1647 | WARN_ON_ONCE(ctx->parent_ctx); |
1664 | mutex_lock(&counter->child_mutex); | 1648 | mutex_lock(&ctx->mutex); |
1665 | perf_counter_for_each_sibling(counter, func); | 1649 | counter = counter->group_leader; |
1666 | list_for_each_entry(child, &counter->child_list, child_list) | 1650 | |
1667 | perf_counter_for_each_sibling(child, func); | 1651 | perf_counter_for_each_child(counter, func); |
1668 | mutex_unlock(&counter->child_mutex); | 1652 | func(counter); |
1653 | list_for_each_entry(sibling, &counter->sibling_list, list_entry) | ||
1654 | perf_counter_for_each_child(counter, func); | ||
1655 | mutex_unlock(&ctx->mutex); | ||
1669 | } | 1656 | } |
1670 | 1657 | ||
1671 | static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) | 1658 | static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) |
@@ -1806,6 +1793,12 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1806 | struct perf_mmap_data *data; | 1793 | struct perf_mmap_data *data; |
1807 | int ret = VM_FAULT_SIGBUS; | 1794 | int ret = VM_FAULT_SIGBUS; |
1808 | 1795 | ||
1796 | if (vmf->flags & FAULT_FLAG_MKWRITE) { | ||
1797 | if (vmf->pgoff == 0) | ||
1798 | ret = 0; | ||
1799 | return ret; | ||
1800 | } | ||
1801 | |||
1809 | rcu_read_lock(); | 1802 | rcu_read_lock(); |
1810 | data = rcu_dereference(counter->data); | 1803 | data = rcu_dereference(counter->data); |
1811 | if (!data) | 1804 | if (!data) |
@@ -1819,9 +1812,16 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1819 | if ((unsigned)nr > data->nr_pages) | 1812 | if ((unsigned)nr > data->nr_pages) |
1820 | goto unlock; | 1813 | goto unlock; |
1821 | 1814 | ||
1815 | if (vmf->flags & FAULT_FLAG_WRITE) | ||
1816 | goto unlock; | ||
1817 | |||
1822 | vmf->page = virt_to_page(data->data_pages[nr]); | 1818 | vmf->page = virt_to_page(data->data_pages[nr]); |
1823 | } | 1819 | } |
1820 | |||
1824 | get_page(vmf->page); | 1821 | get_page(vmf->page); |
1822 | vmf->page->mapping = vma->vm_file->f_mapping; | ||
1823 | vmf->page->index = vmf->pgoff; | ||
1824 | |||
1825 | ret = 0; | 1825 | ret = 0; |
1826 | unlock: | 1826 | unlock: |
1827 | rcu_read_unlock(); | 1827 | rcu_read_unlock(); |
@@ -1874,6 +1874,14 @@ fail: | |||
1874 | return -ENOMEM; | 1874 | return -ENOMEM; |
1875 | } | 1875 | } |
1876 | 1876 | ||
1877 | static void perf_mmap_free_page(unsigned long addr) | ||
1878 | { | ||
1879 | struct page *page = virt_to_page(addr); | ||
1880 | |||
1881 | page->mapping = NULL; | ||
1882 | __free_page(page); | ||
1883 | } | ||
1884 | |||
1877 | static void __perf_mmap_data_free(struct rcu_head *rcu_head) | 1885 | static void __perf_mmap_data_free(struct rcu_head *rcu_head) |
1878 | { | 1886 | { |
1879 | struct perf_mmap_data *data; | 1887 | struct perf_mmap_data *data; |
@@ -1881,9 +1889,10 @@ static void __perf_mmap_data_free(struct rcu_head *rcu_head) | |||
1881 | 1889 | ||
1882 | data = container_of(rcu_head, struct perf_mmap_data, rcu_head); | 1890 | data = container_of(rcu_head, struct perf_mmap_data, rcu_head); |
1883 | 1891 | ||
1884 | free_page((unsigned long)data->user_page); | 1892 | perf_mmap_free_page((unsigned long)data->user_page); |
1885 | for (i = 0; i < data->nr_pages; i++) | 1893 | for (i = 0; i < data->nr_pages; i++) |
1886 | free_page((unsigned long)data->data_pages[i]); | 1894 | perf_mmap_free_page((unsigned long)data->data_pages[i]); |
1895 | |||
1887 | kfree(data); | 1896 | kfree(data); |
1888 | } | 1897 | } |
1889 | 1898 | ||
@@ -1920,9 +1929,10 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
1920 | } | 1929 | } |
1921 | 1930 | ||
1922 | static struct vm_operations_struct perf_mmap_vmops = { | 1931 | static struct vm_operations_struct perf_mmap_vmops = { |
1923 | .open = perf_mmap_open, | 1932 | .open = perf_mmap_open, |
1924 | .close = perf_mmap_close, | 1933 | .close = perf_mmap_close, |
1925 | .fault = perf_mmap_fault, | 1934 | .fault = perf_mmap_fault, |
1935 | .page_mkwrite = perf_mmap_fault, | ||
1926 | }; | 1936 | }; |
1927 | 1937 | ||
1928 | static int perf_mmap(struct file *file, struct vm_area_struct *vma) | 1938 | static int perf_mmap(struct file *file, struct vm_area_struct *vma) |
@@ -1936,7 +1946,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
1936 | long user_extra, extra; | 1946 | long user_extra, extra; |
1937 | int ret = 0; | 1947 | int ret = 0; |
1938 | 1948 | ||
1939 | if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) | 1949 | if (!(vma->vm_flags & VM_SHARED)) |
1940 | return -EINVAL; | 1950 | return -EINVAL; |
1941 | 1951 | ||
1942 | vma_size = vma->vm_end - vma->vm_start; | 1952 | vma_size = vma->vm_end - vma->vm_start; |
@@ -1995,10 +2005,12 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) | |||
1995 | atomic_long_add(user_extra, &user->locked_vm); | 2005 | atomic_long_add(user_extra, &user->locked_vm); |
1996 | vma->vm_mm->locked_vm += extra; | 2006 | vma->vm_mm->locked_vm += extra; |
1997 | counter->data->nr_locked = extra; | 2007 | counter->data->nr_locked = extra; |
2008 | if (vma->vm_flags & VM_WRITE) | ||
2009 | counter->data->writable = 1; | ||
2010 | |||
1998 | unlock: | 2011 | unlock: |
1999 | mutex_unlock(&counter->mmap_mutex); | 2012 | mutex_unlock(&counter->mmap_mutex); |
2000 | 2013 | ||
2001 | vma->vm_flags &= ~VM_MAYWRITE; | ||
2002 | vma->vm_flags |= VM_RESERVED; | 2014 | vma->vm_flags |= VM_RESERVED; |
2003 | vma->vm_ops = &perf_mmap_vmops; | 2015 | vma->vm_ops = &perf_mmap_vmops; |
2004 | 2016 | ||
@@ -2175,11 +2187,38 @@ struct perf_output_handle { | |||
2175 | unsigned long head; | 2187 | unsigned long head; |
2176 | unsigned long offset; | 2188 | unsigned long offset; |
2177 | int nmi; | 2189 | int nmi; |
2178 | int overflow; | 2190 | int sample; |
2179 | int locked; | 2191 | int locked; |
2180 | unsigned long flags; | 2192 | unsigned long flags; |
2181 | }; | 2193 | }; |
2182 | 2194 | ||
2195 | static bool perf_output_space(struct perf_mmap_data *data, | ||
2196 | unsigned int offset, unsigned int head) | ||
2197 | { | ||
2198 | unsigned long tail; | ||
2199 | unsigned long mask; | ||
2200 | |||
2201 | if (!data->writable) | ||
2202 | return true; | ||
2203 | |||
2204 | mask = (data->nr_pages << PAGE_SHIFT) - 1; | ||
2205 | /* | ||
2206 | * Userspace could choose to issue a mb() before updating the tail | ||
2207 | * pointer. So that all reads will be completed before the write is | ||
2208 | * issued. | ||
2209 | */ | ||
2210 | tail = ACCESS_ONCE(data->user_page->data_tail); | ||
2211 | smp_rmb(); | ||
2212 | |||
2213 | offset = (offset - tail) & mask; | ||
2214 | head = (head - tail) & mask; | ||
2215 | |||
2216 | if ((int)(head - offset) < 0) | ||
2217 | return false; | ||
2218 | |||
2219 | return true; | ||
2220 | } | ||
2221 | |||
2183 | static void perf_output_wakeup(struct perf_output_handle *handle) | 2222 | static void perf_output_wakeup(struct perf_output_handle *handle) |
2184 | { | 2223 | { |
2185 | atomic_set(&handle->data->poll, POLL_IN); | 2224 | atomic_set(&handle->data->poll, POLL_IN); |
@@ -2270,12 +2309,57 @@ out: | |||
2270 | local_irq_restore(handle->flags); | 2309 | local_irq_restore(handle->flags); |
2271 | } | 2310 | } |
2272 | 2311 | ||
2312 | static void perf_output_copy(struct perf_output_handle *handle, | ||
2313 | const void *buf, unsigned int len) | ||
2314 | { | ||
2315 | unsigned int pages_mask; | ||
2316 | unsigned int offset; | ||
2317 | unsigned int size; | ||
2318 | void **pages; | ||
2319 | |||
2320 | offset = handle->offset; | ||
2321 | pages_mask = handle->data->nr_pages - 1; | ||
2322 | pages = handle->data->data_pages; | ||
2323 | |||
2324 | do { | ||
2325 | unsigned int page_offset; | ||
2326 | int nr; | ||
2327 | |||
2328 | nr = (offset >> PAGE_SHIFT) & pages_mask; | ||
2329 | page_offset = offset & (PAGE_SIZE - 1); | ||
2330 | size = min_t(unsigned int, PAGE_SIZE - page_offset, len); | ||
2331 | |||
2332 | memcpy(pages[nr] + page_offset, buf, size); | ||
2333 | |||
2334 | len -= size; | ||
2335 | buf += size; | ||
2336 | offset += size; | ||
2337 | } while (len); | ||
2338 | |||
2339 | handle->offset = offset; | ||
2340 | |||
2341 | /* | ||
2342 | * Check we didn't copy past our reservation window, taking the | ||
2343 | * possible unsigned int wrap into account. | ||
2344 | */ | ||
2345 | WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); | ||
2346 | } | ||
2347 | |||
2348 | #define perf_output_put(handle, x) \ | ||
2349 | perf_output_copy((handle), &(x), sizeof(x)) | ||
2350 | |||
2273 | static int perf_output_begin(struct perf_output_handle *handle, | 2351 | static int perf_output_begin(struct perf_output_handle *handle, |
2274 | struct perf_counter *counter, unsigned int size, | 2352 | struct perf_counter *counter, unsigned int size, |
2275 | int nmi, int overflow) | 2353 | int nmi, int sample) |
2276 | { | 2354 | { |
2277 | struct perf_mmap_data *data; | 2355 | struct perf_mmap_data *data; |
2278 | unsigned int offset, head; | 2356 | unsigned int offset, head; |
2357 | int have_lost; | ||
2358 | struct { | ||
2359 | struct perf_event_header header; | ||
2360 | u64 id; | ||
2361 | u64 lost; | ||
2362 | } lost_event; | ||
2279 | 2363 | ||
2280 | /* | 2364 | /* |
2281 | * For inherited counters we send all the output towards the parent. | 2365 | * For inherited counters we send all the output towards the parent. |
@@ -2288,19 +2372,25 @@ static int perf_output_begin(struct perf_output_handle *handle, | |||
2288 | if (!data) | 2372 | if (!data) |
2289 | goto out; | 2373 | goto out; |
2290 | 2374 | ||
2291 | handle->data = data; | 2375 | handle->data = data; |
2292 | handle->counter = counter; | 2376 | handle->counter = counter; |
2293 | handle->nmi = nmi; | 2377 | handle->nmi = nmi; |
2294 | handle->overflow = overflow; | 2378 | handle->sample = sample; |
2295 | 2379 | ||
2296 | if (!data->nr_pages) | 2380 | if (!data->nr_pages) |
2297 | goto fail; | 2381 | goto fail; |
2298 | 2382 | ||
2383 | have_lost = atomic_read(&data->lost); | ||
2384 | if (have_lost) | ||
2385 | size += sizeof(lost_event); | ||
2386 | |||
2299 | perf_output_lock(handle); | 2387 | perf_output_lock(handle); |
2300 | 2388 | ||
2301 | do { | 2389 | do { |
2302 | offset = head = atomic_long_read(&data->head); | 2390 | offset = head = atomic_long_read(&data->head); |
2303 | head += size; | 2391 | head += size; |
2392 | if (unlikely(!perf_output_space(data, offset, head))) | ||
2393 | goto fail; | ||
2304 | } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); | 2394 | } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); |
2305 | 2395 | ||
2306 | handle->offset = offset; | 2396 | handle->offset = offset; |
@@ -2309,55 +2399,27 @@ static int perf_output_begin(struct perf_output_handle *handle, | |||
2309 | if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT)) | 2399 | if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT)) |
2310 | atomic_set(&data->wakeup, 1); | 2400 | atomic_set(&data->wakeup, 1); |
2311 | 2401 | ||
2402 | if (have_lost) { | ||
2403 | lost_event.header.type = PERF_EVENT_LOST; | ||
2404 | lost_event.header.misc = 0; | ||
2405 | lost_event.header.size = sizeof(lost_event); | ||
2406 | lost_event.id = counter->id; | ||
2407 | lost_event.lost = atomic_xchg(&data->lost, 0); | ||
2408 | |||
2409 | perf_output_put(handle, lost_event); | ||
2410 | } | ||
2411 | |||
2312 | return 0; | 2412 | return 0; |
2313 | 2413 | ||
2314 | fail: | 2414 | fail: |
2315 | perf_output_wakeup(handle); | 2415 | atomic_inc(&data->lost); |
2416 | perf_output_unlock(handle); | ||
2316 | out: | 2417 | out: |
2317 | rcu_read_unlock(); | 2418 | rcu_read_unlock(); |
2318 | 2419 | ||
2319 | return -ENOSPC; | 2420 | return -ENOSPC; |
2320 | } | 2421 | } |
2321 | 2422 | ||
2322 | static void perf_output_copy(struct perf_output_handle *handle, | ||
2323 | const void *buf, unsigned int len) | ||
2324 | { | ||
2325 | unsigned int pages_mask; | ||
2326 | unsigned int offset; | ||
2327 | unsigned int size; | ||
2328 | void **pages; | ||
2329 | |||
2330 | offset = handle->offset; | ||
2331 | pages_mask = handle->data->nr_pages - 1; | ||
2332 | pages = handle->data->data_pages; | ||
2333 | |||
2334 | do { | ||
2335 | unsigned int page_offset; | ||
2336 | int nr; | ||
2337 | |||
2338 | nr = (offset >> PAGE_SHIFT) & pages_mask; | ||
2339 | page_offset = offset & (PAGE_SIZE - 1); | ||
2340 | size = min_t(unsigned int, PAGE_SIZE - page_offset, len); | ||
2341 | |||
2342 | memcpy(pages[nr] + page_offset, buf, size); | ||
2343 | |||
2344 | len -= size; | ||
2345 | buf += size; | ||
2346 | offset += size; | ||
2347 | } while (len); | ||
2348 | |||
2349 | handle->offset = offset; | ||
2350 | |||
2351 | /* | ||
2352 | * Check we didn't copy past our reservation window, taking the | ||
2353 | * possible unsigned int wrap into account. | ||
2354 | */ | ||
2355 | WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); | ||
2356 | } | ||
2357 | |||
2358 | #define perf_output_put(handle, x) \ | ||
2359 | perf_output_copy((handle), &(x), sizeof(x)) | ||
2360 | |||
2361 | static void perf_output_end(struct perf_output_handle *handle) | 2423 | static void perf_output_end(struct perf_output_handle *handle) |
2362 | { | 2424 | { |
2363 | struct perf_counter *counter = handle->counter; | 2425 | struct perf_counter *counter = handle->counter; |
@@ -2365,7 +2427,7 @@ static void perf_output_end(struct perf_output_handle *handle) | |||
2365 | 2427 | ||
2366 | int wakeup_events = counter->attr.wakeup_events; | 2428 | int wakeup_events = counter->attr.wakeup_events; |
2367 | 2429 | ||
2368 | if (handle->overflow && wakeup_events) { | 2430 | if (handle->sample && wakeup_events) { |
2369 | int events = atomic_inc_return(&data->events); | 2431 | int events = atomic_inc_return(&data->events); |
2370 | if (events >= wakeup_events) { | 2432 | if (events >= wakeup_events) { |
2371 | atomic_sub(wakeup_events, &data->events); | 2433 | atomic_sub(wakeup_events, &data->events); |
@@ -2970,7 +3032,7 @@ static void perf_log_throttle(struct perf_counter *counter, int enable) | |||
2970 | } | 3032 | } |
2971 | 3033 | ||
2972 | /* | 3034 | /* |
2973 | * Generic counter overflow handling. | 3035 | * Generic counter overflow handling, sampling. |
2974 | */ | 3036 | */ |
2975 | 3037 | ||
2976 | int perf_counter_overflow(struct perf_counter *counter, int nmi, | 3038 | int perf_counter_overflow(struct perf_counter *counter, int nmi, |
@@ -3109,20 +3171,15 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) | |||
3109 | } | 3171 | } |
3110 | 3172 | ||
3111 | static void perf_swcounter_overflow(struct perf_counter *counter, | 3173 | static void perf_swcounter_overflow(struct perf_counter *counter, |
3112 | int nmi, struct pt_regs *regs, u64 addr) | 3174 | int nmi, struct perf_sample_data *data) |
3113 | { | 3175 | { |
3114 | struct perf_sample_data data = { | 3176 | data->period = counter->hw.last_period; |
3115 | .regs = regs, | ||
3116 | .addr = addr, | ||
3117 | .period = counter->hw.last_period, | ||
3118 | }; | ||
3119 | 3177 | ||
3120 | perf_swcounter_update(counter); | 3178 | perf_swcounter_update(counter); |
3121 | perf_swcounter_set_period(counter); | 3179 | perf_swcounter_set_period(counter); |
3122 | if (perf_counter_overflow(counter, nmi, &data)) | 3180 | if (perf_counter_overflow(counter, nmi, data)) |
3123 | /* soft-disable the counter */ | 3181 | /* soft-disable the counter */ |
3124 | ; | 3182 | ; |
3125 | |||
3126 | } | 3183 | } |
3127 | 3184 | ||
3128 | static int perf_swcounter_is_counting(struct perf_counter *counter) | 3185 | static int perf_swcounter_is_counting(struct perf_counter *counter) |
@@ -3187,18 +3244,18 @@ static int perf_swcounter_match(struct perf_counter *counter, | |||
3187 | } | 3244 | } |
3188 | 3245 | ||
3189 | static void perf_swcounter_add(struct perf_counter *counter, u64 nr, | 3246 | static void perf_swcounter_add(struct perf_counter *counter, u64 nr, |
3190 | int nmi, struct pt_regs *regs, u64 addr) | 3247 | int nmi, struct perf_sample_data *data) |
3191 | { | 3248 | { |
3192 | int neg = atomic64_add_negative(nr, &counter->hw.count); | 3249 | int neg = atomic64_add_negative(nr, &counter->hw.count); |
3193 | 3250 | ||
3194 | if (counter->hw.sample_period && !neg && regs) | 3251 | if (counter->hw.sample_period && !neg && data->regs) |
3195 | perf_swcounter_overflow(counter, nmi, regs, addr); | 3252 | perf_swcounter_overflow(counter, nmi, data); |
3196 | } | 3253 | } |
3197 | 3254 | ||
3198 | static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, | 3255 | static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, |
3199 | enum perf_type_id type, u32 event, | 3256 | enum perf_type_id type, |
3200 | u64 nr, int nmi, struct pt_regs *regs, | 3257 | u32 event, u64 nr, int nmi, |
3201 | u64 addr) | 3258 | struct perf_sample_data *data) |
3202 | { | 3259 | { |
3203 | struct perf_counter *counter; | 3260 | struct perf_counter *counter; |
3204 | 3261 | ||
@@ -3207,8 +3264,8 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, | |||
3207 | 3264 | ||
3208 | rcu_read_lock(); | 3265 | rcu_read_lock(); |
3209 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { | 3266 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { |
3210 | if (perf_swcounter_match(counter, type, event, regs)) | 3267 | if (perf_swcounter_match(counter, type, event, data->regs)) |
3211 | perf_swcounter_add(counter, nr, nmi, regs, addr); | 3268 | perf_swcounter_add(counter, nr, nmi, data); |
3212 | } | 3269 | } |
3213 | rcu_read_unlock(); | 3270 | rcu_read_unlock(); |
3214 | } | 3271 | } |
@@ -3227,9 +3284,9 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) | |||
3227 | return &cpuctx->recursion[0]; | 3284 | return &cpuctx->recursion[0]; |
3228 | } | 3285 | } |
3229 | 3286 | ||
3230 | static void __perf_swcounter_event(enum perf_type_id type, u32 event, | 3287 | static void do_perf_swcounter_event(enum perf_type_id type, u32 event, |
3231 | u64 nr, int nmi, struct pt_regs *regs, | 3288 | u64 nr, int nmi, |
3232 | u64 addr) | 3289 | struct perf_sample_data *data) |
3233 | { | 3290 | { |
3234 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); | 3291 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); |
3235 | int *recursion = perf_swcounter_recursion_context(cpuctx); | 3292 | int *recursion = perf_swcounter_recursion_context(cpuctx); |
@@ -3242,7 +3299,7 @@ static void __perf_swcounter_event(enum perf_type_id type, u32 event, | |||
3242 | barrier(); | 3299 | barrier(); |
3243 | 3300 | ||
3244 | perf_swcounter_ctx_event(&cpuctx->ctx, type, event, | 3301 | perf_swcounter_ctx_event(&cpuctx->ctx, type, event, |
3245 | nr, nmi, regs, addr); | 3302 | nr, nmi, data); |
3246 | rcu_read_lock(); | 3303 | rcu_read_lock(); |
3247 | /* | 3304 | /* |
3248 | * doesn't really matter which of the child contexts the | 3305 | * doesn't really matter which of the child contexts the |
@@ -3250,7 +3307,7 @@ static void __perf_swcounter_event(enum perf_type_id type, u32 event, | |||
3250 | */ | 3307 | */ |
3251 | ctx = rcu_dereference(current->perf_counter_ctxp); | 3308 | ctx = rcu_dereference(current->perf_counter_ctxp); |
3252 | if (ctx) | 3309 | if (ctx) |
3253 | perf_swcounter_ctx_event(ctx, type, event, nr, nmi, regs, addr); | 3310 | perf_swcounter_ctx_event(ctx, type, event, nr, nmi, data); |
3254 | rcu_read_unlock(); | 3311 | rcu_read_unlock(); |
3255 | 3312 | ||
3256 | barrier(); | 3313 | barrier(); |
@@ -3263,7 +3320,12 @@ out: | |||
3263 | void | 3320 | void |
3264 | perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) | 3321 | perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) |
3265 | { | 3322 | { |
3266 | __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr); | 3323 | struct perf_sample_data data = { |
3324 | .regs = regs, | ||
3325 | .addr = addr, | ||
3326 | }; | ||
3327 | |||
3328 | do_perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, &data); | ||
3267 | } | 3329 | } |
3268 | 3330 | ||
3269 | static void perf_swcounter_read(struct perf_counter *counter) | 3331 | static void perf_swcounter_read(struct perf_counter *counter) |
@@ -3404,36 +3466,18 @@ static const struct pmu perf_ops_task_clock = { | |||
3404 | .read = task_clock_perf_counter_read, | 3466 | .read = task_clock_perf_counter_read, |
3405 | }; | 3467 | }; |
3406 | 3468 | ||
3407 | /* | ||
3408 | * Software counter: cpu migrations | ||
3409 | */ | ||
3410 | void perf_counter_task_migration(struct task_struct *task, int cpu) | ||
3411 | { | ||
3412 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
3413 | struct perf_counter_context *ctx; | ||
3414 | |||
3415 | perf_swcounter_ctx_event(&cpuctx->ctx, PERF_TYPE_SOFTWARE, | ||
3416 | PERF_COUNT_SW_CPU_MIGRATIONS, | ||
3417 | 1, 1, NULL, 0); | ||
3418 | |||
3419 | ctx = perf_pin_task_context(task); | ||
3420 | if (ctx) { | ||
3421 | perf_swcounter_ctx_event(ctx, PERF_TYPE_SOFTWARE, | ||
3422 | PERF_COUNT_SW_CPU_MIGRATIONS, | ||
3423 | 1, 1, NULL, 0); | ||
3424 | perf_unpin_context(ctx); | ||
3425 | } | ||
3426 | } | ||
3427 | |||
3428 | #ifdef CONFIG_EVENT_PROFILE | 3469 | #ifdef CONFIG_EVENT_PROFILE |
3429 | void perf_tpcounter_event(int event_id) | 3470 | void perf_tpcounter_event(int event_id) |
3430 | { | 3471 | { |
3431 | struct pt_regs *regs = get_irq_regs(); | 3472 | struct perf_sample_data data = { |
3473 | .regs = get_irq_regs(); | ||
3474 | .addr = 0, | ||
3475 | }; | ||
3432 | 3476 | ||
3433 | if (!regs) | 3477 | if (!data.regs) |
3434 | regs = task_pt_regs(current); | 3478 | data.regs = task_pt_regs(current); |
3435 | 3479 | ||
3436 | __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0); | 3480 | do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data); |
3437 | } | 3481 | } |
3438 | EXPORT_SYMBOL_GPL(perf_tpcounter_event); | 3482 | EXPORT_SYMBOL_GPL(perf_tpcounter_event); |
3439 | 3483 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 92e51287b980..7c9098d186e6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1978,7 +1978,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
1978 | if (task_hot(p, old_rq->clock, NULL)) | 1978 | if (task_hot(p, old_rq->clock, NULL)) |
1979 | schedstat_inc(p, se.nr_forced2_migrations); | 1979 | schedstat_inc(p, se.nr_forced2_migrations); |
1980 | #endif | 1980 | #endif |
1981 | perf_counter_task_migration(p, new_cpu); | 1981 | perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS, |
1982 | 1, 1, NULL, 0); | ||
1982 | } | 1983 | } |
1983 | p->se.vruntime -= old_cfsrq->min_vruntime - | 1984 | p->se.vruntime -= old_cfsrq->min_vruntime - |
1984 | new_cfsrq->min_vruntime; | 1985 | new_cfsrq->min_vruntime; |
diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0cbd5d6874ec..36d7eef49913 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile | |||
@@ -157,10 +157,15 @@ uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not') | |||
157 | uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') | 157 | uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') |
158 | uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') | 158 | uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') |
159 | 159 | ||
160 | # If we're on a 64-bit kernel, use -m64 | ||
161 | ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M)) | ||
162 | M64 := -m64 | ||
163 | endif | ||
164 | |||
160 | # CFLAGS and LDFLAGS are for the users to override from the command line. | 165 | # CFLAGS and LDFLAGS are for the users to override from the command line. |
161 | 166 | ||
162 | CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6 | 167 | CFLAGS = $(M64) -ggdb3 -Wall -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -Werror -O6 |
163 | LDFLAGS = -lpthread -lrt -lelf | 168 | LDFLAGS = -lpthread -lrt -lelf -lm |
164 | ALL_CFLAGS = $(CFLAGS) | 169 | ALL_CFLAGS = $(CFLAGS) |
165 | ALL_LDFLAGS = $(LDFLAGS) | 170 | ALL_LDFLAGS = $(LDFLAGS) |
166 | STRIP ?= strip | 171 | STRIP ?= strip |
@@ -285,6 +290,7 @@ LIB_FILE=libperf.a | |||
285 | 290 | ||
286 | LIB_H += ../../include/linux/perf_counter.h | 291 | LIB_H += ../../include/linux/perf_counter.h |
287 | LIB_H += perf.h | 292 | LIB_H += perf.h |
293 | LIB_H += types.h | ||
288 | LIB_H += util/list.h | 294 | LIB_H += util/list.h |
289 | LIB_H += util/rbtree.h | 295 | LIB_H += util/rbtree.h |
290 | LIB_H += util/levenshtein.h | 296 | LIB_H += util/levenshtein.h |
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index b1ed5f766cb3..7e58e3ad1508 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c | |||
@@ -25,6 +25,10 @@ | |||
25 | #define SHOW_USER 2 | 25 | #define SHOW_USER 2 |
26 | #define SHOW_HV 4 | 26 | #define SHOW_HV 4 |
27 | 27 | ||
28 | #define MIN_GREEN 0.5 | ||
29 | #define MIN_RED 5.0 | ||
30 | |||
31 | |||
28 | static char const *input_name = "perf.data"; | 32 | static char const *input_name = "perf.data"; |
29 | static char *vmlinux = "vmlinux"; | 33 | static char *vmlinux = "vmlinux"; |
30 | 34 | ||
@@ -39,40 +43,42 @@ static int dump_trace = 0; | |||
39 | 43 | ||
40 | static int verbose; | 44 | static int verbose; |
41 | 45 | ||
46 | static int print_line; | ||
47 | |||
42 | static unsigned long page_size; | 48 | static unsigned long page_size; |
43 | static unsigned long mmap_window = 32; | 49 | static unsigned long mmap_window = 32; |
44 | 50 | ||
45 | struct ip_event { | 51 | struct ip_event { |
46 | struct perf_event_header header; | 52 | struct perf_event_header header; |
47 | __u64 ip; | 53 | u64 ip; |
48 | __u32 pid, tid; | 54 | u32 pid, tid; |
49 | }; | 55 | }; |
50 | 56 | ||
51 | struct mmap_event { | 57 | struct mmap_event { |
52 | struct perf_event_header header; | 58 | struct perf_event_header header; |
53 | __u32 pid, tid; | 59 | u32 pid, tid; |
54 | __u64 start; | 60 | u64 start; |
55 | __u64 len; | 61 | u64 len; |
56 | __u64 pgoff; | 62 | u64 pgoff; |
57 | char filename[PATH_MAX]; | 63 | char filename[PATH_MAX]; |
58 | }; | 64 | }; |
59 | 65 | ||
60 | struct comm_event { | 66 | struct comm_event { |
61 | struct perf_event_header header; | 67 | struct perf_event_header header; |
62 | __u32 pid, tid; | 68 | u32 pid, tid; |
63 | char comm[16]; | 69 | char comm[16]; |
64 | }; | 70 | }; |
65 | 71 | ||
66 | struct fork_event { | 72 | struct fork_event { |
67 | struct perf_event_header header; | 73 | struct perf_event_header header; |
68 | __u32 pid, ppid; | 74 | u32 pid, ppid; |
69 | }; | 75 | }; |
70 | 76 | ||
71 | struct period_event { | 77 | struct period_event { |
72 | struct perf_event_header header; | 78 | struct perf_event_header header; |
73 | __u64 time; | 79 | u64 time; |
74 | __u64 id; | 80 | u64 id; |
75 | __u64 sample_period; | 81 | u64 sample_period; |
76 | }; | 82 | }; |
77 | 83 | ||
78 | typedef union event_union { | 84 | typedef union event_union { |
@@ -84,6 +90,13 @@ typedef union event_union { | |||
84 | struct period_event period; | 90 | struct period_event period; |
85 | } event_t; | 91 | } event_t; |
86 | 92 | ||
93 | |||
94 | struct sym_ext { | ||
95 | struct rb_node node; | ||
96 | double percent; | ||
97 | char *path; | ||
98 | }; | ||
99 | |||
87 | static LIST_HEAD(dsos); | 100 | static LIST_HEAD(dsos); |
88 | static struct dso *kernel_dso; | 101 | static struct dso *kernel_dso; |
89 | static struct dso *vdso; | 102 | static struct dso *vdso; |
@@ -145,7 +158,7 @@ static void dsos__fprintf(FILE *fp) | |||
145 | dso__fprintf(pos, fp); | 158 | dso__fprintf(pos, fp); |
146 | } | 159 | } |
147 | 160 | ||
148 | static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip) | 161 | static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) |
149 | { | 162 | { |
150 | return dso__find_symbol(kernel_dso, ip); | 163 | return dso__find_symbol(kernel_dso, ip); |
151 | } | 164 | } |
@@ -178,19 +191,19 @@ static int load_kernel(void) | |||
178 | 191 | ||
179 | struct map { | 192 | struct map { |
180 | struct list_head node; | 193 | struct list_head node; |
181 | __u64 start; | 194 | u64 start; |
182 | __u64 end; | 195 | u64 end; |
183 | __u64 pgoff; | 196 | u64 pgoff; |
184 | __u64 (*map_ip)(struct map *, __u64); | 197 | u64 (*map_ip)(struct map *, u64); |
185 | struct dso *dso; | 198 | struct dso *dso; |
186 | }; | 199 | }; |
187 | 200 | ||
188 | static __u64 map__map_ip(struct map *map, __u64 ip) | 201 | static u64 map__map_ip(struct map *map, u64 ip) |
189 | { | 202 | { |
190 | return ip - map->start + map->pgoff; | 203 | return ip - map->start + map->pgoff; |
191 | } | 204 | } |
192 | 205 | ||
193 | static __u64 vdso__map_ip(struct map *map, __u64 ip) | 206 | static u64 vdso__map_ip(struct map *map, u64 ip) |
194 | { | 207 | { |
195 | return ip; | 208 | return ip; |
196 | } | 209 | } |
@@ -373,7 +386,7 @@ static int thread__fork(struct thread *self, struct thread *parent) | |||
373 | return 0; | 386 | return 0; |
374 | } | 387 | } |
375 | 388 | ||
376 | static struct map *thread__find_map(struct thread *self, __u64 ip) | 389 | static struct map *thread__find_map(struct thread *self, u64 ip) |
377 | { | 390 | { |
378 | struct map *pos; | 391 | struct map *pos; |
379 | 392 | ||
@@ -414,7 +427,7 @@ struct hist_entry { | |||
414 | struct map *map; | 427 | struct map *map; |
415 | struct dso *dso; | 428 | struct dso *dso; |
416 | struct symbol *sym; | 429 | struct symbol *sym; |
417 | __u64 ip; | 430 | u64 ip; |
418 | char level; | 431 | char level; |
419 | 432 | ||
420 | uint32_t count; | 433 | uint32_t count; |
@@ -519,7 +532,7 @@ sort__dso_print(FILE *fp, struct hist_entry *self) | |||
519 | if (self->dso) | 532 | if (self->dso) |
520 | return fprintf(fp, "%-25s", self->dso->name); | 533 | return fprintf(fp, "%-25s", self->dso->name); |
521 | 534 | ||
522 | return fprintf(fp, "%016llx ", (__u64)self->ip); | 535 | return fprintf(fp, "%016llx ", (u64)self->ip); |
523 | } | 536 | } |
524 | 537 | ||
525 | static struct sort_entry sort_dso = { | 538 | static struct sort_entry sort_dso = { |
@@ -533,7 +546,7 @@ static struct sort_entry sort_dso = { | |||
533 | static int64_t | 546 | static int64_t |
534 | sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) | 547 | sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) |
535 | { | 548 | { |
536 | __u64 ip_l, ip_r; | 549 | u64 ip_l, ip_r; |
537 | 550 | ||
538 | if (left->sym == right->sym) | 551 | if (left->sym == right->sym) |
539 | return 0; | 552 | return 0; |
@@ -550,13 +563,13 @@ sort__sym_print(FILE *fp, struct hist_entry *self) | |||
550 | size_t ret = 0; | 563 | size_t ret = 0; |
551 | 564 | ||
552 | if (verbose) | 565 | if (verbose) |
553 | ret += fprintf(fp, "%#018llx ", (__u64)self->ip); | 566 | ret += fprintf(fp, "%#018llx ", (u64)self->ip); |
554 | 567 | ||
555 | if (self->sym) { | 568 | if (self->sym) { |
556 | ret += fprintf(fp, "[%c] %s", | 569 | ret += fprintf(fp, "[%c] %s", |
557 | self->dso == kernel_dso ? 'k' : '.', self->sym->name); | 570 | self->dso == kernel_dso ? 'k' : '.', self->sym->name); |
558 | } else { | 571 | } else { |
559 | ret += fprintf(fp, "%#016llx", (__u64)self->ip); | 572 | ret += fprintf(fp, "%#016llx", (u64)self->ip); |
560 | } | 573 | } |
561 | 574 | ||
562 | return ret; | 575 | return ret; |
@@ -647,7 +660,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) | |||
647 | /* | 660 | /* |
648 | * collect histogram counts | 661 | * collect histogram counts |
649 | */ | 662 | */ |
650 | static void hist_hit(struct hist_entry *he, __u64 ip) | 663 | static void hist_hit(struct hist_entry *he, u64 ip) |
651 | { | 664 | { |
652 | unsigned int sym_size, offset; | 665 | unsigned int sym_size, offset; |
653 | struct symbol *sym = he->sym; | 666 | struct symbol *sym = he->sym; |
@@ -676,7 +689,7 @@ static void hist_hit(struct hist_entry *he, __u64 ip) | |||
676 | 689 | ||
677 | static int | 690 | static int |
678 | hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, | 691 | hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, |
679 | struct symbol *sym, __u64 ip, char level) | 692 | struct symbol *sym, u64 ip, char level) |
680 | { | 693 | { |
681 | struct rb_node **p = &hist.rb_node; | 694 | struct rb_node **p = &hist.rb_node; |
682 | struct rb_node *parent = NULL; | 695 | struct rb_node *parent = NULL; |
@@ -848,7 +861,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | |||
848 | int show = 0; | 861 | int show = 0; |
849 | struct dso *dso = NULL; | 862 | struct dso *dso = NULL; |
850 | struct thread *thread = threads__findnew(event->ip.pid); | 863 | struct thread *thread = threads__findnew(event->ip.pid); |
851 | __u64 ip = event->ip.ip; | 864 | u64 ip = event->ip.ip; |
852 | struct map *map = NULL; | 865 | struct map *map = NULL; |
853 | 866 | ||
854 | dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n", | 867 | dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n", |
@@ -1030,13 +1043,33 @@ process_event(event_t *event, unsigned long offset, unsigned long head) | |||
1030 | return 0; | 1043 | return 0; |
1031 | } | 1044 | } |
1032 | 1045 | ||
1046 | static char *get_color(double percent) | ||
1047 | { | ||
1048 | char *color = PERF_COLOR_NORMAL; | ||
1049 | |||
1050 | /* | ||
1051 | * We color high-overhead entries in red, mid-overhead | ||
1052 | * entries in green - and keep the low overhead places | ||
1053 | * normal: | ||
1054 | */ | ||
1055 | if (percent >= MIN_RED) | ||
1056 | color = PERF_COLOR_RED; | ||
1057 | else { | ||
1058 | if (percent > MIN_GREEN) | ||
1059 | color = PERF_COLOR_GREEN; | ||
1060 | } | ||
1061 | return color; | ||
1062 | } | ||
1063 | |||
1033 | static int | 1064 | static int |
1034 | parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) | 1065 | parse_line(FILE *file, struct symbol *sym, u64 start, u64 len) |
1035 | { | 1066 | { |
1036 | char *line = NULL, *tmp, *tmp2; | 1067 | char *line = NULL, *tmp, *tmp2; |
1068 | static const char *prev_line; | ||
1069 | static const char *prev_color; | ||
1037 | unsigned int offset; | 1070 | unsigned int offset; |
1038 | size_t line_len; | 1071 | size_t line_len; |
1039 | __u64 line_ip; | 1072 | u64 line_ip; |
1040 | int ret; | 1073 | int ret; |
1041 | char *c; | 1074 | char *c; |
1042 | 1075 | ||
@@ -1073,27 +1106,36 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) | |||
1073 | } | 1106 | } |
1074 | 1107 | ||
1075 | if (line_ip != -1) { | 1108 | if (line_ip != -1) { |
1109 | const char *path = NULL; | ||
1076 | unsigned int hits = 0; | 1110 | unsigned int hits = 0; |
1077 | double percent = 0.0; | 1111 | double percent = 0.0; |
1078 | char *color = PERF_COLOR_NORMAL; | 1112 | char *color; |
1113 | struct sym_ext *sym_ext = sym->priv; | ||
1079 | 1114 | ||
1080 | offset = line_ip - start; | 1115 | offset = line_ip - start; |
1081 | if (offset < len) | 1116 | if (offset < len) |
1082 | hits = sym->hist[offset]; | 1117 | hits = sym->hist[offset]; |
1083 | 1118 | ||
1084 | if (sym->hist_sum) | 1119 | if (offset < len && sym_ext) { |
1120 | path = sym_ext[offset].path; | ||
1121 | percent = sym_ext[offset].percent; | ||
1122 | } else if (sym->hist_sum) | ||
1085 | percent = 100.0 * hits / sym->hist_sum; | 1123 | percent = 100.0 * hits / sym->hist_sum; |
1086 | 1124 | ||
1125 | color = get_color(percent); | ||
1126 | |||
1087 | /* | 1127 | /* |
1088 | * We color high-overhead entries in red, mid-overhead | 1128 | * Also color the filename and line if needed, with |
1089 | * entries in green - and keep the low overhead places | 1129 | * the same color than the percentage. Don't print it |
1090 | * normal: | 1130 | * twice for close colored ip with the same filename:line |
1091 | */ | 1131 | */ |
1092 | if (percent >= 5.0) | 1132 | if (path) { |
1093 | color = PERF_COLOR_RED; | 1133 | if (!prev_line || strcmp(prev_line, path) |
1094 | else { | 1134 | || color != prev_color) { |
1095 | if (percent > 0.5) | 1135 | color_fprintf(stdout, color, " %s", path); |
1096 | color = PERF_COLOR_GREEN; | 1136 | prev_line = path; |
1137 | prev_color = color; | ||
1138 | } | ||
1097 | } | 1139 | } |
1098 | 1140 | ||
1099 | color_fprintf(stdout, color, " %7.2f", percent); | 1141 | color_fprintf(stdout, color, " %7.2f", percent); |
@@ -1109,10 +1151,125 @@ parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) | |||
1109 | return 0; | 1151 | return 0; |
1110 | } | 1152 | } |
1111 | 1153 | ||
1154 | static struct rb_root root_sym_ext; | ||
1155 | |||
1156 | static void insert_source_line(struct sym_ext *sym_ext) | ||
1157 | { | ||
1158 | struct sym_ext *iter; | ||
1159 | struct rb_node **p = &root_sym_ext.rb_node; | ||
1160 | struct rb_node *parent = NULL; | ||
1161 | |||
1162 | while (*p != NULL) { | ||
1163 | parent = *p; | ||
1164 | iter = rb_entry(parent, struct sym_ext, node); | ||
1165 | |||
1166 | if (sym_ext->percent > iter->percent) | ||
1167 | p = &(*p)->rb_left; | ||
1168 | else | ||
1169 | p = &(*p)->rb_right; | ||
1170 | } | ||
1171 | |||
1172 | rb_link_node(&sym_ext->node, parent, p); | ||
1173 | rb_insert_color(&sym_ext->node, &root_sym_ext); | ||
1174 | } | ||
1175 | |||
1176 | static void free_source_line(struct symbol *sym, int len) | ||
1177 | { | ||
1178 | struct sym_ext *sym_ext = sym->priv; | ||
1179 | int i; | ||
1180 | |||
1181 | if (!sym_ext) | ||
1182 | return; | ||
1183 | |||
1184 | for (i = 0; i < len; i++) | ||
1185 | free(sym_ext[i].path); | ||
1186 | free(sym_ext); | ||
1187 | |||
1188 | sym->priv = NULL; | ||
1189 | root_sym_ext = RB_ROOT; | ||
1190 | } | ||
1191 | |||
1192 | /* Get the filename:line for the colored entries */ | ||
1193 | static void | ||
1194 | get_source_line(struct symbol *sym, u64 start, int len, char *filename) | ||
1195 | { | ||
1196 | int i; | ||
1197 | char cmd[PATH_MAX * 2]; | ||
1198 | struct sym_ext *sym_ext; | ||
1199 | |||
1200 | if (!sym->hist_sum) | ||
1201 | return; | ||
1202 | |||
1203 | sym->priv = calloc(len, sizeof(struct sym_ext)); | ||
1204 | if (!sym->priv) | ||
1205 | return; | ||
1206 | |||
1207 | sym_ext = sym->priv; | ||
1208 | |||
1209 | for (i = 0; i < len; i++) { | ||
1210 | char *path = NULL; | ||
1211 | size_t line_len; | ||
1212 | u64 offset; | ||
1213 | FILE *fp; | ||
1214 | |||
1215 | sym_ext[i].percent = 100.0 * sym->hist[i] / sym->hist_sum; | ||
1216 | if (sym_ext[i].percent <= 0.5) | ||
1217 | continue; | ||
1218 | |||
1219 | offset = start + i; | ||
1220 | sprintf(cmd, "addr2line -e %s %016llx", filename, offset); | ||
1221 | fp = popen(cmd, "r"); | ||
1222 | if (!fp) | ||
1223 | continue; | ||
1224 | |||
1225 | if (getline(&path, &line_len, fp) < 0 || !line_len) | ||
1226 | goto next; | ||
1227 | |||
1228 | sym_ext[i].path = malloc(sizeof(char) * line_len + 1); | ||
1229 | if (!sym_ext[i].path) | ||
1230 | goto next; | ||
1231 | |||
1232 | strcpy(sym_ext[i].path, path); | ||
1233 | insert_source_line(&sym_ext[i]); | ||
1234 | |||
1235 | next: | ||
1236 | pclose(fp); | ||
1237 | } | ||
1238 | } | ||
1239 | |||
1240 | static void print_summary(char *filename) | ||
1241 | { | ||
1242 | struct sym_ext *sym_ext; | ||
1243 | struct rb_node *node; | ||
1244 | |||
1245 | printf("\nSorted summary for file %s\n", filename); | ||
1246 | printf("----------------------------------------------\n\n"); | ||
1247 | |||
1248 | if (RB_EMPTY_ROOT(&root_sym_ext)) { | ||
1249 | printf(" Nothing higher than %1.1f%%\n", MIN_GREEN); | ||
1250 | return; | ||
1251 | } | ||
1252 | |||
1253 | node = rb_first(&root_sym_ext); | ||
1254 | while (node) { | ||
1255 | double percent; | ||
1256 | char *color; | ||
1257 | char *path; | ||
1258 | |||
1259 | sym_ext = rb_entry(node, struct sym_ext, node); | ||
1260 | percent = sym_ext->percent; | ||
1261 | color = get_color(percent); | ||
1262 | path = sym_ext->path; | ||
1263 | |||
1264 | color_fprintf(stdout, color, " %7.2f %s", percent, path); | ||
1265 | node = rb_next(node); | ||
1266 | } | ||
1267 | } | ||
1268 | |||
1112 | static void annotate_sym(struct dso *dso, struct symbol *sym) | 1269 | static void annotate_sym(struct dso *dso, struct symbol *sym) |
1113 | { | 1270 | { |
1114 | char *filename = dso->name; | 1271 | char *filename = dso->name; |
1115 | __u64 start, end, len; | 1272 | u64 start, end, len; |
1116 | char command[PATH_MAX*2]; | 1273 | char command[PATH_MAX*2]; |
1117 | FILE *file; | 1274 | FILE *file; |
1118 | 1275 | ||
@@ -1121,13 +1278,6 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) | |||
1121 | if (dso == kernel_dso) | 1278 | if (dso == kernel_dso) |
1122 | filename = vmlinux; | 1279 | filename = vmlinux; |
1123 | 1280 | ||
1124 | printf("\n------------------------------------------------\n"); | ||
1125 | printf(" Percent | Source code & Disassembly of %s\n", filename); | ||
1126 | printf("------------------------------------------------\n"); | ||
1127 | |||
1128 | if (verbose >= 2) | ||
1129 | printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); | ||
1130 | |||
1131 | start = sym->obj_start; | 1281 | start = sym->obj_start; |
1132 | if (!start) | 1282 | if (!start) |
1133 | start = sym->start; | 1283 | start = sym->start; |
@@ -1135,7 +1285,19 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) | |||
1135 | end = start + sym->end - sym->start + 1; | 1285 | end = start + sym->end - sym->start + 1; |
1136 | len = sym->end - sym->start; | 1286 | len = sym->end - sym->start; |
1137 | 1287 | ||
1138 | sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (__u64)start, (__u64)end, filename); | 1288 | if (print_line) { |
1289 | get_source_line(sym, start, len, filename); | ||
1290 | print_summary(filename); | ||
1291 | } | ||
1292 | |||
1293 | printf("\n\n------------------------------------------------\n"); | ||
1294 | printf(" Percent | Source code & Disassembly of %s\n", filename); | ||
1295 | printf("------------------------------------------------\n"); | ||
1296 | |||
1297 | if (verbose >= 2) | ||
1298 | printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); | ||
1299 | |||
1300 | sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (u64)start, (u64)end, filename); | ||
1139 | 1301 | ||
1140 | if (verbose >= 3) | 1302 | if (verbose >= 3) |
1141 | printf("doing: %s\n", command); | 1303 | printf("doing: %s\n", command); |
@@ -1150,6 +1312,8 @@ static void annotate_sym(struct dso *dso, struct symbol *sym) | |||
1150 | } | 1312 | } |
1151 | 1313 | ||
1152 | pclose(file); | 1314 | pclose(file); |
1315 | if (print_line) | ||
1316 | free_source_line(sym, len); | ||
1153 | } | 1317 | } |
1154 | 1318 | ||
1155 | static void find_annotations(void) | 1319 | static void find_annotations(void) |
@@ -1308,6 +1472,8 @@ static const struct option options[] = { | |||
1308 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | 1472 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, |
1309 | "dump raw trace in ASCII"), | 1473 | "dump raw trace in ASCII"), |
1310 | OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), | 1474 | OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), |
1475 | OPT_BOOLEAN('l', "print-line", &print_line, | ||
1476 | "print matching source lines (may be slow)"), | ||
1311 | OPT_END() | 1477 | OPT_END() |
1312 | }; | 1478 | }; |
1313 | 1479 | ||
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0f5771f615da..d7ebbd757543 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -37,33 +37,37 @@ static pid_t target_pid = -1; | |||
37 | static int inherit = 1; | 37 | static int inherit = 1; |
38 | static int force = 0; | 38 | static int force = 0; |
39 | static int append_file = 0; | 39 | static int append_file = 0; |
40 | static int call_graph = 0; | ||
40 | static int verbose = 0; | 41 | static int verbose = 0; |
41 | 42 | ||
42 | static long samples; | 43 | static long samples; |
43 | static struct timeval last_read; | 44 | static struct timeval last_read; |
44 | static struct timeval this_read; | 45 | static struct timeval this_read; |
45 | 46 | ||
46 | static __u64 bytes_written; | 47 | static u64 bytes_written; |
47 | 48 | ||
48 | static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; | 49 | static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; |
49 | 50 | ||
50 | static int nr_poll; | 51 | static int nr_poll; |
51 | static int nr_cpu; | 52 | static int nr_cpu; |
52 | 53 | ||
54 | static int file_new = 1; | ||
55 | static struct perf_file_header file_header; | ||
56 | |||
53 | struct mmap_event { | 57 | struct mmap_event { |
54 | struct perf_event_header header; | 58 | struct perf_event_header header; |
55 | __u32 pid; | 59 | u32 pid; |
56 | __u32 tid; | 60 | u32 tid; |
57 | __u64 start; | 61 | u64 start; |
58 | __u64 len; | 62 | u64 len; |
59 | __u64 pgoff; | 63 | u64 pgoff; |
60 | char filename[PATH_MAX]; | 64 | char filename[PATH_MAX]; |
61 | }; | 65 | }; |
62 | 66 | ||
63 | struct comm_event { | 67 | struct comm_event { |
64 | struct perf_event_header header; | 68 | struct perf_event_header header; |
65 | __u32 pid; | 69 | u32 pid; |
66 | __u32 tid; | 70 | u32 tid; |
67 | char comm[16]; | 71 | char comm[16]; |
68 | }; | 72 | }; |
69 | 73 | ||
@@ -77,10 +81,10 @@ struct mmap_data { | |||
77 | 81 | ||
78 | static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; | 82 | static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; |
79 | 83 | ||
80 | static unsigned int mmap_read_head(struct mmap_data *md) | 84 | static unsigned long mmap_read_head(struct mmap_data *md) |
81 | { | 85 | { |
82 | struct perf_counter_mmap_page *pc = md->base; | 86 | struct perf_counter_mmap_page *pc = md->base; |
83 | int head; | 87 | long head; |
84 | 88 | ||
85 | head = pc->data_head; | 89 | head = pc->data_head; |
86 | rmb(); | 90 | rmb(); |
@@ -88,6 +92,32 @@ static unsigned int mmap_read_head(struct mmap_data *md) | |||
88 | return head; | 92 | return head; |
89 | } | 93 | } |
90 | 94 | ||
95 | static void mmap_write_tail(struct mmap_data *md, unsigned long tail) | ||
96 | { | ||
97 | struct perf_counter_mmap_page *pc = md->base; | ||
98 | |||
99 | /* | ||
100 | * ensure all reads are done before we write the tail out. | ||
101 | */ | ||
102 | /* mb(); */ | ||
103 | pc->data_tail = tail; | ||
104 | } | ||
105 | |||
106 | static void write_output(void *buf, size_t size) | ||
107 | { | ||
108 | while (size) { | ||
109 | int ret = write(output, buf, size); | ||
110 | |||
111 | if (ret < 0) | ||
112 | die("failed to write"); | ||
113 | |||
114 | size -= ret; | ||
115 | buf += ret; | ||
116 | |||
117 | bytes_written += ret; | ||
118 | } | ||
119 | } | ||
120 | |||
91 | static void mmap_read(struct mmap_data *md) | 121 | static void mmap_read(struct mmap_data *md) |
92 | { | 122 | { |
93 | unsigned int head = mmap_read_head(md); | 123 | unsigned int head = mmap_read_head(md); |
@@ -108,7 +138,7 @@ static void mmap_read(struct mmap_data *md) | |||
108 | * In either case, truncate and restart at head. | 138 | * In either case, truncate and restart at head. |
109 | */ | 139 | */ |
110 | diff = head - old; | 140 | diff = head - old; |
111 | if (diff > md->mask / 2 || diff < 0) { | 141 | if (diff < 0) { |
112 | struct timeval iv; | 142 | struct timeval iv; |
113 | unsigned long msecs; | 143 | unsigned long msecs; |
114 | 144 | ||
@@ -136,36 +166,17 @@ static void mmap_read(struct mmap_data *md) | |||
136 | size = md->mask + 1 - (old & md->mask); | 166 | size = md->mask + 1 - (old & md->mask); |
137 | old += size; | 167 | old += size; |
138 | 168 | ||
139 | while (size) { | 169 | write_output(buf, size); |
140 | int ret = write(output, buf, size); | ||
141 | |||
142 | if (ret < 0) | ||
143 | die("failed to write"); | ||
144 | |||
145 | size -= ret; | ||
146 | buf += ret; | ||
147 | |||
148 | bytes_written += ret; | ||
149 | } | ||
150 | } | 170 | } |
151 | 171 | ||
152 | buf = &data[old & md->mask]; | 172 | buf = &data[old & md->mask]; |
153 | size = head - old; | 173 | size = head - old; |
154 | old += size; | 174 | old += size; |
155 | 175 | ||
156 | while (size) { | 176 | write_output(buf, size); |
157 | int ret = write(output, buf, size); | ||
158 | |||
159 | if (ret < 0) | ||
160 | die("failed to write"); | ||
161 | |||
162 | size -= ret; | ||
163 | buf += ret; | ||
164 | |||
165 | bytes_written += ret; | ||
166 | } | ||
167 | 177 | ||
168 | md->prev = old; | 178 | md->prev = old; |
179 | mmap_write_tail(md, old); | ||
169 | } | 180 | } |
170 | 181 | ||
171 | static volatile int done = 0; | 182 | static volatile int done = 0; |
@@ -191,7 +202,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full) | |||
191 | struct comm_event comm_ev; | 202 | struct comm_event comm_ev; |
192 | char filename[PATH_MAX]; | 203 | char filename[PATH_MAX]; |
193 | char bf[BUFSIZ]; | 204 | char bf[BUFSIZ]; |
194 | int fd, ret; | 205 | int fd; |
195 | size_t size; | 206 | size_t size; |
196 | char *field, *sep; | 207 | char *field, *sep; |
197 | DIR *tasks; | 208 | DIR *tasks; |
@@ -201,8 +212,12 @@ static void pid_synthesize_comm_event(pid_t pid, int full) | |||
201 | 212 | ||
202 | fd = open(filename, O_RDONLY); | 213 | fd = open(filename, O_RDONLY); |
203 | if (fd < 0) { | 214 | if (fd < 0) { |
204 | fprintf(stderr, "couldn't open %s\n", filename); | 215 | /* |
205 | exit(EXIT_FAILURE); | 216 | * We raced with a task exiting - just return: |
217 | */ | ||
218 | if (verbose) | ||
219 | fprintf(stderr, "couldn't open %s\n", filename); | ||
220 | return; | ||
206 | } | 221 | } |
207 | if (read(fd, bf, sizeof(bf)) < 0) { | 222 | if (read(fd, bf, sizeof(bf)) < 0) { |
208 | fprintf(stderr, "couldn't read %s\n", filename); | 223 | fprintf(stderr, "couldn't read %s\n", filename); |
@@ -223,17 +238,13 @@ static void pid_synthesize_comm_event(pid_t pid, int full) | |||
223 | 238 | ||
224 | comm_ev.pid = pid; | 239 | comm_ev.pid = pid; |
225 | comm_ev.header.type = PERF_EVENT_COMM; | 240 | comm_ev.header.type = PERF_EVENT_COMM; |
226 | size = ALIGN(size, sizeof(__u64)); | 241 | size = ALIGN(size, sizeof(u64)); |
227 | comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); | 242 | comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); |
228 | 243 | ||
229 | if (!full) { | 244 | if (!full) { |
230 | comm_ev.tid = pid; | 245 | comm_ev.tid = pid; |
231 | 246 | ||
232 | ret = write(output, &comm_ev, comm_ev.header.size); | 247 | write_output(&comm_ev, comm_ev.header.size); |
233 | if (ret < 0) { | ||
234 | perror("failed to write"); | ||
235 | exit(-1); | ||
236 | } | ||
237 | return; | 248 | return; |
238 | } | 249 | } |
239 | 250 | ||
@@ -248,11 +259,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full) | |||
248 | 259 | ||
249 | comm_ev.tid = pid; | 260 | comm_ev.tid = pid; |
250 | 261 | ||
251 | ret = write(output, &comm_ev, comm_ev.header.size); | 262 | write_output(&comm_ev, comm_ev.header.size); |
252 | if (ret < 0) { | ||
253 | perror("failed to write"); | ||
254 | exit(-1); | ||
255 | } | ||
256 | } | 263 | } |
257 | closedir(tasks); | 264 | closedir(tasks); |
258 | return; | 265 | return; |
@@ -272,8 +279,12 @@ static void pid_synthesize_mmap_samples(pid_t pid) | |||
272 | 279 | ||
273 | fp = fopen(filename, "r"); | 280 | fp = fopen(filename, "r"); |
274 | if (fp == NULL) { | 281 | if (fp == NULL) { |
275 | fprintf(stderr, "couldn't open %s\n", filename); | 282 | /* |
276 | exit(EXIT_FAILURE); | 283 | * We raced with a task exiting - just return: |
284 | */ | ||
285 | if (verbose) | ||
286 | fprintf(stderr, "couldn't open %s\n", filename); | ||
287 | return; | ||
277 | } | 288 | } |
278 | while (1) { | 289 | while (1) { |
279 | char bf[BUFSIZ], *pbf = bf; | 290 | char bf[BUFSIZ], *pbf = bf; |
@@ -304,17 +315,14 @@ static void pid_synthesize_mmap_samples(pid_t pid) | |||
304 | size = strlen(execname); | 315 | size = strlen(execname); |
305 | execname[size - 1] = '\0'; /* Remove \n */ | 316 | execname[size - 1] = '\0'; /* Remove \n */ |
306 | memcpy(mmap_ev.filename, execname, size); | 317 | memcpy(mmap_ev.filename, execname, size); |
307 | size = ALIGN(size, sizeof(__u64)); | 318 | size = ALIGN(size, sizeof(u64)); |
308 | mmap_ev.len -= mmap_ev.start; | 319 | mmap_ev.len -= mmap_ev.start; |
309 | mmap_ev.header.size = (sizeof(mmap_ev) - | 320 | mmap_ev.header.size = (sizeof(mmap_ev) - |
310 | (sizeof(mmap_ev.filename) - size)); | 321 | (sizeof(mmap_ev.filename) - size)); |
311 | mmap_ev.pid = pid; | 322 | mmap_ev.pid = pid; |
312 | mmap_ev.tid = pid; | 323 | mmap_ev.tid = pid; |
313 | 324 | ||
314 | if (write(output, &mmap_ev, mmap_ev.header.size) < 0) { | 325 | write_output(&mmap_ev, mmap_ev.header.size); |
315 | perror("failed to write"); | ||
316 | exit(-1); | ||
317 | } | ||
318 | } | 326 | } |
319 | } | 327 | } |
320 | 328 | ||
@@ -351,11 +359,25 @@ static void create_counter(int counter, int cpu, pid_t pid) | |||
351 | int track = 1; | 359 | int track = 1; |
352 | 360 | ||
353 | attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; | 361 | attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; |
362 | |||
354 | if (freq) { | 363 | if (freq) { |
355 | attr->sample_type |= PERF_SAMPLE_PERIOD; | 364 | attr->sample_type |= PERF_SAMPLE_PERIOD; |
356 | attr->freq = 1; | 365 | attr->freq = 1; |
357 | attr->sample_freq = freq; | 366 | attr->sample_freq = freq; |
358 | } | 367 | } |
368 | |||
369 | if (call_graph) | ||
370 | attr->sample_type |= PERF_SAMPLE_CALLCHAIN; | ||
371 | |||
372 | if (file_new) { | ||
373 | file_header.sample_type = attr->sample_type; | ||
374 | } else { | ||
375 | if (file_header.sample_type != attr->sample_type) { | ||
376 | fprintf(stderr, "incompatible append\n"); | ||
377 | exit(-1); | ||
378 | } | ||
379 | } | ||
380 | |||
359 | attr->mmap = track; | 381 | attr->mmap = track; |
360 | attr->comm = track; | 382 | attr->comm = track; |
361 | attr->inherit = (cpu < 0) && inherit; | 383 | attr->inherit = (cpu < 0) && inherit; |
@@ -410,7 +432,7 @@ try_again: | |||
410 | mmap_array[nr_cpu][counter].prev = 0; | 432 | mmap_array[nr_cpu][counter].prev = 0; |
411 | mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; | 433 | mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; |
412 | mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | 434 | mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, |
413 | PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0); | 435 | PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0); |
414 | if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { | 436 | if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { |
415 | error("failed to mmap with %d (%s)\n", errno, strerror(errno)); | 437 | error("failed to mmap with %d (%s)\n", errno, strerror(errno)); |
416 | exit(-1); | 438 | exit(-1); |
@@ -435,6 +457,14 @@ static void open_counters(int cpu, pid_t pid) | |||
435 | nr_cpu++; | 457 | nr_cpu++; |
436 | } | 458 | } |
437 | 459 | ||
460 | static void atexit_header(void) | ||
461 | { | ||
462 | file_header.data_size += bytes_written; | ||
463 | |||
464 | if (pwrite(output, &file_header, sizeof(file_header), 0) == -1) | ||
465 | perror("failed to write on file headers"); | ||
466 | } | ||
467 | |||
438 | static int __cmd_record(int argc, const char **argv) | 468 | static int __cmd_record(int argc, const char **argv) |
439 | { | 469 | { |
440 | int i, counter; | 470 | int i, counter; |
@@ -448,6 +478,10 @@ static int __cmd_record(int argc, const char **argv) | |||
448 | assert(nr_cpus <= MAX_NR_CPUS); | 478 | assert(nr_cpus <= MAX_NR_CPUS); |
449 | assert(nr_cpus >= 0); | 479 | assert(nr_cpus >= 0); |
450 | 480 | ||
481 | atexit(sig_atexit); | ||
482 | signal(SIGCHLD, sig_handler); | ||
483 | signal(SIGINT, sig_handler); | ||
484 | |||
451 | if (!stat(output_name, &st) && !force && !append_file) { | 485 | if (!stat(output_name, &st) && !force && !append_file) { |
452 | fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", | 486 | fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", |
453 | output_name); | 487 | output_name); |
@@ -456,7 +490,7 @@ static int __cmd_record(int argc, const char **argv) | |||
456 | 490 | ||
457 | flags = O_CREAT|O_RDWR; | 491 | flags = O_CREAT|O_RDWR; |
458 | if (append_file) | 492 | if (append_file) |
459 | flags |= O_APPEND; | 493 | file_new = 0; |
460 | else | 494 | else |
461 | flags |= O_TRUNC; | 495 | flags |= O_TRUNC; |
462 | 496 | ||
@@ -466,15 +500,22 @@ static int __cmd_record(int argc, const char **argv) | |||
466 | exit(-1); | 500 | exit(-1); |
467 | } | 501 | } |
468 | 502 | ||
503 | if (!file_new) { | ||
504 | if (read(output, &file_header, sizeof(file_header)) == -1) { | ||
505 | perror("failed to read file headers"); | ||
506 | exit(-1); | ||
507 | } | ||
508 | |||
509 | lseek(output, file_header.data_size, SEEK_CUR); | ||
510 | } | ||
511 | |||
512 | atexit(atexit_header); | ||
513 | |||
469 | if (!system_wide) { | 514 | if (!system_wide) { |
470 | open_counters(-1, target_pid != -1 ? target_pid : getpid()); | 515 | open_counters(-1, target_pid != -1 ? target_pid : getpid()); |
471 | } else for (i = 0; i < nr_cpus; i++) | 516 | } else for (i = 0; i < nr_cpus; i++) |
472 | open_counters(i, target_pid); | 517 | open_counters(i, target_pid); |
473 | 518 | ||
474 | atexit(sig_atexit); | ||
475 | signal(SIGCHLD, sig_handler); | ||
476 | signal(SIGINT, sig_handler); | ||
477 | |||
478 | if (target_pid == -1 && argc) { | 519 | if (target_pid == -1 && argc) { |
479 | pid = fork(); | 520 | pid = fork(); |
480 | if (pid < 0) | 521 | if (pid < 0) |
@@ -555,6 +596,8 @@ static const struct option options[] = { | |||
555 | "profile at this frequency"), | 596 | "profile at this frequency"), |
556 | OPT_INTEGER('m', "mmap-pages", &mmap_pages, | 597 | OPT_INTEGER('m', "mmap-pages", &mmap_pages, |
557 | "number of mmap data pages"), | 598 | "number of mmap data pages"), |
599 | OPT_BOOLEAN('g', "call-graph", &call_graph, | ||
600 | "do call-graph (stack chain/backtrace) recording"), | ||
558 | OPT_BOOLEAN('v', "verbose", &verbose, | 601 | OPT_BOOLEAN('v', "verbose", &verbose, |
559 | "be more verbose (show counter open errors, etc)"), | 602 | "be more verbose (show counter open errors, etc)"), |
560 | OPT_END() | 603 | OPT_END() |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 82fa93b4db99..5eb5566f0c95 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -36,45 +36,65 @@ static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; | |||
36 | 36 | ||
37 | static int dump_trace = 0; | 37 | static int dump_trace = 0; |
38 | #define dprintf(x...) do { if (dump_trace) printf(x); } while (0) | 38 | #define dprintf(x...) do { if (dump_trace) printf(x); } while (0) |
39 | #define cdprintf(x...) do { if (dump_trace) color_fprintf(stdout, color, x); } while (0) | ||
39 | 40 | ||
40 | static int verbose; | 41 | static int verbose; |
42 | #define eprintf(x...) do { if (verbose) fprintf(stderr, x); } while (0) | ||
43 | |||
41 | static int full_paths; | 44 | static int full_paths; |
42 | 45 | ||
43 | static unsigned long page_size; | 46 | static unsigned long page_size; |
44 | static unsigned long mmap_window = 32; | 47 | static unsigned long mmap_window = 32; |
45 | 48 | ||
49 | static char default_parent_pattern[] = "^sys_|^do_page_fault"; | ||
50 | static char *parent_pattern = default_parent_pattern; | ||
51 | static regex_t parent_regex; | ||
52 | |||
53 | static int exclude_other = 1; | ||
54 | |||
46 | struct ip_event { | 55 | struct ip_event { |
47 | struct perf_event_header header; | 56 | struct perf_event_header header; |
48 | __u64 ip; | 57 | u64 ip; |
49 | __u32 pid, tid; | 58 | u32 pid, tid; |
50 | __u64 period; | 59 | unsigned char __more_data[]; |
60 | }; | ||
61 | |||
62 | struct ip_callchain { | ||
63 | u64 nr; | ||
64 | u64 ips[0]; | ||
51 | }; | 65 | }; |
52 | 66 | ||
53 | struct mmap_event { | 67 | struct mmap_event { |
54 | struct perf_event_header header; | 68 | struct perf_event_header header; |
55 | __u32 pid, tid; | 69 | u32 pid, tid; |
56 | __u64 start; | 70 | u64 start; |
57 | __u64 len; | 71 | u64 len; |
58 | __u64 pgoff; | 72 | u64 pgoff; |
59 | char filename[PATH_MAX]; | 73 | char filename[PATH_MAX]; |
60 | }; | 74 | }; |
61 | 75 | ||
62 | struct comm_event { | 76 | struct comm_event { |
63 | struct perf_event_header header; | 77 | struct perf_event_header header; |
64 | __u32 pid, tid; | 78 | u32 pid, tid; |
65 | char comm[16]; | 79 | char comm[16]; |
66 | }; | 80 | }; |
67 | 81 | ||
68 | struct fork_event { | 82 | struct fork_event { |
69 | struct perf_event_header header; | 83 | struct perf_event_header header; |
70 | __u32 pid, ppid; | 84 | u32 pid, ppid; |
71 | }; | 85 | }; |
72 | 86 | ||
73 | struct period_event { | 87 | struct period_event { |
74 | struct perf_event_header header; | 88 | struct perf_event_header header; |
75 | __u64 time; | 89 | u64 time; |
76 | __u64 id; | 90 | u64 id; |
77 | __u64 sample_period; | 91 | u64 sample_period; |
92 | }; | ||
93 | |||
94 | struct lost_event { | ||
95 | struct perf_event_header header; | ||
96 | u64 id; | ||
97 | u64 lost; | ||
78 | }; | 98 | }; |
79 | 99 | ||
80 | typedef union event_union { | 100 | typedef union event_union { |
@@ -84,6 +104,7 @@ typedef union event_union { | |||
84 | struct comm_event comm; | 104 | struct comm_event comm; |
85 | struct fork_event fork; | 105 | struct fork_event fork; |
86 | struct period_event period; | 106 | struct period_event period; |
107 | struct lost_event lost; | ||
87 | } event_t; | 108 | } event_t; |
88 | 109 | ||
89 | static LIST_HEAD(dsos); | 110 | static LIST_HEAD(dsos); |
@@ -119,15 +140,11 @@ static struct dso *dsos__findnew(const char *name) | |||
119 | 140 | ||
120 | nr = dso__load(dso, NULL, verbose); | 141 | nr = dso__load(dso, NULL, verbose); |
121 | if (nr < 0) { | 142 | if (nr < 0) { |
122 | if (verbose) | 143 | eprintf("Failed to open: %s\n", name); |
123 | fprintf(stderr, "Failed to open: %s\n", name); | ||
124 | goto out_delete_dso; | 144 | goto out_delete_dso; |
125 | } | 145 | } |
126 | if (!nr && verbose) { | 146 | if (!nr) |
127 | fprintf(stderr, | 147 | eprintf("No symbols found in: %s, maybe install a debug package?\n", name); |
128 | "No symbols found in: %s, maybe install a debug package?\n", | ||
129 | name); | ||
130 | } | ||
131 | 148 | ||
132 | dsos__add(dso); | 149 | dsos__add(dso); |
133 | 150 | ||
@@ -146,7 +163,7 @@ static void dsos__fprintf(FILE *fp) | |||
146 | dso__fprintf(pos, fp); | 163 | dso__fprintf(pos, fp); |
147 | } | 164 | } |
148 | 165 | ||
149 | static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip) | 166 | static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) |
150 | { | 167 | { |
151 | return dso__find_symbol(kernel_dso, ip); | 168 | return dso__find_symbol(kernel_dso, ip); |
152 | } | 169 | } |
@@ -193,19 +210,19 @@ static int strcommon(const char *pathname) | |||
193 | 210 | ||
194 | struct map { | 211 | struct map { |
195 | struct list_head node; | 212 | struct list_head node; |
196 | __u64 start; | 213 | u64 start; |
197 | __u64 end; | 214 | u64 end; |
198 | __u64 pgoff; | 215 | u64 pgoff; |
199 | __u64 (*map_ip)(struct map *, __u64); | 216 | u64 (*map_ip)(struct map *, u64); |
200 | struct dso *dso; | 217 | struct dso *dso; |
201 | }; | 218 | }; |
202 | 219 | ||
203 | static __u64 map__map_ip(struct map *map, __u64 ip) | 220 | static u64 map__map_ip(struct map *map, u64 ip) |
204 | { | 221 | { |
205 | return ip - map->start + map->pgoff; | 222 | return ip - map->start + map->pgoff; |
206 | } | 223 | } |
207 | 224 | ||
208 | static __u64 vdso__map_ip(struct map *map, __u64 ip) | 225 | static u64 vdso__map_ip(struct map *map, u64 ip) |
209 | { | 226 | { |
210 | return ip; | 227 | return ip; |
211 | } | 228 | } |
@@ -412,7 +429,7 @@ static int thread__fork(struct thread *self, struct thread *parent) | |||
412 | return 0; | 429 | return 0; |
413 | } | 430 | } |
414 | 431 | ||
415 | static struct map *thread__find_map(struct thread *self, __u64 ip) | 432 | static struct map *thread__find_map(struct thread *self, u64 ip) |
416 | { | 433 | { |
417 | struct map *pos; | 434 | struct map *pos; |
418 | 435 | ||
@@ -453,10 +470,11 @@ struct hist_entry { | |||
453 | struct map *map; | 470 | struct map *map; |
454 | struct dso *dso; | 471 | struct dso *dso; |
455 | struct symbol *sym; | 472 | struct symbol *sym; |
456 | __u64 ip; | 473 | struct symbol *parent; |
474 | u64 ip; | ||
457 | char level; | 475 | char level; |
458 | 476 | ||
459 | __u64 count; | 477 | u64 count; |
460 | }; | 478 | }; |
461 | 479 | ||
462 | /* | 480 | /* |
@@ -473,6 +491,16 @@ struct sort_entry { | |||
473 | size_t (*print)(FILE *fp, struct hist_entry *); | 491 | size_t (*print)(FILE *fp, struct hist_entry *); |
474 | }; | 492 | }; |
475 | 493 | ||
494 | static int64_t cmp_null(void *l, void *r) | ||
495 | { | ||
496 | if (!l && !r) | ||
497 | return 0; | ||
498 | else if (!l) | ||
499 | return -1; | ||
500 | else | ||
501 | return 1; | ||
502 | } | ||
503 | |||
476 | /* --sort pid */ | 504 | /* --sort pid */ |
477 | 505 | ||
478 | static int64_t | 506 | static int64_t |
@@ -507,14 +535,8 @@ sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) | |||
507 | char *comm_l = left->thread->comm; | 535 | char *comm_l = left->thread->comm; |
508 | char *comm_r = right->thread->comm; | 536 | char *comm_r = right->thread->comm; |
509 | 537 | ||
510 | if (!comm_l || !comm_r) { | 538 | if (!comm_l || !comm_r) |
511 | if (!comm_l && !comm_r) | 539 | return cmp_null(comm_l, comm_r); |
512 | return 0; | ||
513 | else if (!comm_l) | ||
514 | return -1; | ||
515 | else | ||
516 | return 1; | ||
517 | } | ||
518 | 540 | ||
519 | return strcmp(comm_l, comm_r); | 541 | return strcmp(comm_l, comm_r); |
520 | } | 542 | } |
@@ -540,14 +562,8 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) | |||
540 | struct dso *dso_l = left->dso; | 562 | struct dso *dso_l = left->dso; |
541 | struct dso *dso_r = right->dso; | 563 | struct dso *dso_r = right->dso; |
542 | 564 | ||
543 | if (!dso_l || !dso_r) { | 565 | if (!dso_l || !dso_r) |
544 | if (!dso_l && !dso_r) | 566 | return cmp_null(dso_l, dso_r); |
545 | return 0; | ||
546 | else if (!dso_l) | ||
547 | return -1; | ||
548 | else | ||
549 | return 1; | ||
550 | } | ||
551 | 567 | ||
552 | return strcmp(dso_l->name, dso_r->name); | 568 | return strcmp(dso_l->name, dso_r->name); |
553 | } | 569 | } |
@@ -558,7 +574,7 @@ sort__dso_print(FILE *fp, struct hist_entry *self) | |||
558 | if (self->dso) | 574 | if (self->dso) |
559 | return fprintf(fp, "%-25s", self->dso->name); | 575 | return fprintf(fp, "%-25s", self->dso->name); |
560 | 576 | ||
561 | return fprintf(fp, "%016llx ", (__u64)self->ip); | 577 | return fprintf(fp, "%016llx ", (u64)self->ip); |
562 | } | 578 | } |
563 | 579 | ||
564 | static struct sort_entry sort_dso = { | 580 | static struct sort_entry sort_dso = { |
@@ -572,7 +588,7 @@ static struct sort_entry sort_dso = { | |||
572 | static int64_t | 588 | static int64_t |
573 | sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) | 589 | sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) |
574 | { | 590 | { |
575 | __u64 ip_l, ip_r; | 591 | u64 ip_l, ip_r; |
576 | 592 | ||
577 | if (left->sym == right->sym) | 593 | if (left->sym == right->sym) |
578 | return 0; | 594 | return 0; |
@@ -589,13 +605,13 @@ sort__sym_print(FILE *fp, struct hist_entry *self) | |||
589 | size_t ret = 0; | 605 | size_t ret = 0; |
590 | 606 | ||
591 | if (verbose) | 607 | if (verbose) |
592 | ret += fprintf(fp, "%#018llx ", (__u64)self->ip); | 608 | ret += fprintf(fp, "%#018llx ", (u64)self->ip); |
593 | 609 | ||
594 | if (self->sym) { | 610 | if (self->sym) { |
595 | ret += fprintf(fp, "[%c] %s", | 611 | ret += fprintf(fp, "[%c] %s", |
596 | self->dso == kernel_dso ? 'k' : '.', self->sym->name); | 612 | self->dso == kernel_dso ? 'k' : '.', self->sym->name); |
597 | } else { | 613 | } else { |
598 | ret += fprintf(fp, "%#016llx", (__u64)self->ip); | 614 | ret += fprintf(fp, "%#016llx", (u64)self->ip); |
599 | } | 615 | } |
600 | 616 | ||
601 | return ret; | 617 | return ret; |
@@ -607,7 +623,38 @@ static struct sort_entry sort_sym = { | |||
607 | .print = sort__sym_print, | 623 | .print = sort__sym_print, |
608 | }; | 624 | }; |
609 | 625 | ||
626 | /* --sort parent */ | ||
627 | |||
628 | static int64_t | ||
629 | sort__parent_cmp(struct hist_entry *left, struct hist_entry *right) | ||
630 | { | ||
631 | struct symbol *sym_l = left->parent; | ||
632 | struct symbol *sym_r = right->parent; | ||
633 | |||
634 | if (!sym_l || !sym_r) | ||
635 | return cmp_null(sym_l, sym_r); | ||
636 | |||
637 | return strcmp(sym_l->name, sym_r->name); | ||
638 | } | ||
639 | |||
640 | static size_t | ||
641 | sort__parent_print(FILE *fp, struct hist_entry *self) | ||
642 | { | ||
643 | size_t ret = 0; | ||
644 | |||
645 | ret += fprintf(fp, "%-20s", self->parent ? self->parent->name : "[other]"); | ||
646 | |||
647 | return ret; | ||
648 | } | ||
649 | |||
650 | static struct sort_entry sort_parent = { | ||
651 | .header = "Parent symbol ", | ||
652 | .cmp = sort__parent_cmp, | ||
653 | .print = sort__parent_print, | ||
654 | }; | ||
655 | |||
610 | static int sort__need_collapse = 0; | 656 | static int sort__need_collapse = 0; |
657 | static int sort__has_parent = 0; | ||
611 | 658 | ||
612 | struct sort_dimension { | 659 | struct sort_dimension { |
613 | char *name; | 660 | char *name; |
@@ -620,6 +667,7 @@ static struct sort_dimension sort_dimensions[] = { | |||
620 | { .name = "comm", .entry = &sort_comm, }, | 667 | { .name = "comm", .entry = &sort_comm, }, |
621 | { .name = "dso", .entry = &sort_dso, }, | 668 | { .name = "dso", .entry = &sort_dso, }, |
622 | { .name = "symbol", .entry = &sort_sym, }, | 669 | { .name = "symbol", .entry = &sort_sym, }, |
670 | { .name = "parent", .entry = &sort_parent, }, | ||
623 | }; | 671 | }; |
624 | 672 | ||
625 | static LIST_HEAD(hist_entry__sort_list); | 673 | static LIST_HEAD(hist_entry__sort_list); |
@@ -640,6 +688,19 @@ static int sort_dimension__add(char *tok) | |||
640 | if (sd->entry->collapse) | 688 | if (sd->entry->collapse) |
641 | sort__need_collapse = 1; | 689 | sort__need_collapse = 1; |
642 | 690 | ||
691 | if (sd->entry == &sort_parent) { | ||
692 | int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); | ||
693 | if (ret) { | ||
694 | char err[BUFSIZ]; | ||
695 | |||
696 | regerror(ret, &parent_regex, err, sizeof(err)); | ||
697 | fprintf(stderr, "Invalid regex: %s\n%s", | ||
698 | parent_pattern, err); | ||
699 | exit(-1); | ||
700 | } | ||
701 | sort__has_parent = 1; | ||
702 | } | ||
703 | |||
643 | list_add_tail(&sd->entry->list, &hist_entry__sort_list); | 704 | list_add_tail(&sd->entry->list, &hist_entry__sort_list); |
644 | sd->taken = 1; | 705 | sd->taken = 1; |
645 | 706 | ||
@@ -684,11 +745,14 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) | |||
684 | } | 745 | } |
685 | 746 | ||
686 | static size_t | 747 | static size_t |
687 | hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) | 748 | hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) |
688 | { | 749 | { |
689 | struct sort_entry *se; | 750 | struct sort_entry *se; |
690 | size_t ret; | 751 | size_t ret; |
691 | 752 | ||
753 | if (exclude_other && !self->parent) | ||
754 | return 0; | ||
755 | |||
692 | if (total_samples) { | 756 | if (total_samples) { |
693 | double percent = self->count * 100.0 / total_samples; | 757 | double percent = self->count * 100.0 / total_samples; |
694 | char *color = PERF_COLOR_NORMAL; | 758 | char *color = PERF_COLOR_NORMAL; |
@@ -711,6 +775,9 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) | |||
711 | ret = fprintf(fp, "%12Ld ", self->count); | 775 | ret = fprintf(fp, "%12Ld ", self->count); |
712 | 776 | ||
713 | list_for_each_entry(se, &hist_entry__sort_list, list) { | 777 | list_for_each_entry(se, &hist_entry__sort_list, list) { |
778 | if (exclude_other && (se == &sort_parent)) | ||
779 | continue; | ||
780 | |||
714 | fprintf(fp, " "); | 781 | fprintf(fp, " "); |
715 | ret += se->print(fp, self); | 782 | ret += se->print(fp, self); |
716 | } | 783 | } |
@@ -721,12 +788,72 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) | |||
721 | } | 788 | } |
722 | 789 | ||
723 | /* | 790 | /* |
791 | * | ||
792 | */ | ||
793 | |||
794 | static struct symbol * | ||
795 | resolve_symbol(struct thread *thread, struct map **mapp, | ||
796 | struct dso **dsop, u64 *ipp) | ||
797 | { | ||
798 | struct dso *dso = dsop ? *dsop : NULL; | ||
799 | struct map *map = mapp ? *mapp : NULL; | ||
800 | uint64_t ip = *ipp; | ||
801 | |||
802 | if (!thread) | ||
803 | return NULL; | ||
804 | |||
805 | if (dso) | ||
806 | goto got_dso; | ||
807 | |||
808 | if (map) | ||
809 | goto got_map; | ||
810 | |||
811 | map = thread__find_map(thread, ip); | ||
812 | if (map != NULL) { | ||
813 | if (mapp) | ||
814 | *mapp = map; | ||
815 | got_map: | ||
816 | ip = map->map_ip(map, ip); | ||
817 | *ipp = ip; | ||
818 | |||
819 | dso = map->dso; | ||
820 | } else { | ||
821 | /* | ||
822 | * If this is outside of all known maps, | ||
823 | * and is a negative address, try to look it | ||
824 | * up in the kernel dso, as it might be a | ||
825 | * vsyscall (which executes in user-mode): | ||
826 | */ | ||
827 | if ((long long)ip < 0) | ||
828 | dso = kernel_dso; | ||
829 | } | ||
830 | dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>"); | ||
831 | |||
832 | if (dsop) | ||
833 | *dsop = dso; | ||
834 | |||
835 | if (!dso) | ||
836 | return NULL; | ||
837 | got_dso: | ||
838 | return dso->find_symbol(dso, ip); | ||
839 | } | ||
840 | |||
841 | static int call__match(struct symbol *sym) | ||
842 | { | ||
843 | if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0)) | ||
844 | return 1; | ||
845 | |||
846 | return 0; | ||
847 | } | ||
848 | |||
849 | /* | ||
724 | * collect histogram counts | 850 | * collect histogram counts |
725 | */ | 851 | */ |
726 | 852 | ||
727 | static int | 853 | static int |
728 | hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, | 854 | hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, |
729 | struct symbol *sym, __u64 ip, char level, __u64 count) | 855 | struct symbol *sym, u64 ip, struct ip_callchain *chain, |
856 | char level, u64 count) | ||
730 | { | 857 | { |
731 | struct rb_node **p = &hist.rb_node; | 858 | struct rb_node **p = &hist.rb_node; |
732 | struct rb_node *parent = NULL; | 859 | struct rb_node *parent = NULL; |
@@ -739,9 +866,41 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, | |||
739 | .ip = ip, | 866 | .ip = ip, |
740 | .level = level, | 867 | .level = level, |
741 | .count = count, | 868 | .count = count, |
869 | .parent = NULL, | ||
742 | }; | 870 | }; |
743 | int cmp; | 871 | int cmp; |
744 | 872 | ||
873 | if (sort__has_parent && chain) { | ||
874 | u64 context = PERF_CONTEXT_MAX; | ||
875 | int i; | ||
876 | |||
877 | for (i = 0; i < chain->nr; i++) { | ||
878 | u64 ip = chain->ips[i]; | ||
879 | struct dso *dso = NULL; | ||
880 | struct symbol *sym; | ||
881 | |||
882 | if (ip >= PERF_CONTEXT_MAX) { | ||
883 | context = ip; | ||
884 | continue; | ||
885 | } | ||
886 | |||
887 | switch (context) { | ||
888 | case PERF_CONTEXT_KERNEL: | ||
889 | dso = kernel_dso; | ||
890 | break; | ||
891 | default: | ||
892 | break; | ||
893 | } | ||
894 | |||
895 | sym = resolve_symbol(thread, NULL, &dso, &ip); | ||
896 | |||
897 | if (sym && call__match(sym)) { | ||
898 | entry.parent = sym; | ||
899 | break; | ||
900 | } | ||
901 | } | ||
902 | } | ||
903 | |||
745 | while (*p != NULL) { | 904 | while (*p != NULL) { |
746 | parent = *p; | 905 | parent = *p; |
747 | he = rb_entry(parent, struct hist_entry, rb_node); | 906 | he = rb_entry(parent, struct hist_entry, rb_node); |
@@ -873,7 +1032,7 @@ static void output__resort(void) | |||
873 | } | 1032 | } |
874 | } | 1033 | } |
875 | 1034 | ||
876 | static size_t output__fprintf(FILE *fp, __u64 total_samples) | 1035 | static size_t output__fprintf(FILE *fp, u64 total_samples) |
877 | { | 1036 | { |
878 | struct hist_entry *pos; | 1037 | struct hist_entry *pos; |
879 | struct sort_entry *se; | 1038 | struct sort_entry *se; |
@@ -882,18 +1041,24 @@ static size_t output__fprintf(FILE *fp, __u64 total_samples) | |||
882 | 1041 | ||
883 | fprintf(fp, "\n"); | 1042 | fprintf(fp, "\n"); |
884 | fprintf(fp, "#\n"); | 1043 | fprintf(fp, "#\n"); |
885 | fprintf(fp, "# (%Ld samples)\n", (__u64)total_samples); | 1044 | fprintf(fp, "# (%Ld samples)\n", (u64)total_samples); |
886 | fprintf(fp, "#\n"); | 1045 | fprintf(fp, "#\n"); |
887 | 1046 | ||
888 | fprintf(fp, "# Overhead"); | 1047 | fprintf(fp, "# Overhead"); |
889 | list_for_each_entry(se, &hist_entry__sort_list, list) | 1048 | list_for_each_entry(se, &hist_entry__sort_list, list) { |
1049 | if (exclude_other && (se == &sort_parent)) | ||
1050 | continue; | ||
890 | fprintf(fp, " %s", se->header); | 1051 | fprintf(fp, " %s", se->header); |
1052 | } | ||
891 | fprintf(fp, "\n"); | 1053 | fprintf(fp, "\n"); |
892 | 1054 | ||
893 | fprintf(fp, "# ........"); | 1055 | fprintf(fp, "# ........"); |
894 | list_for_each_entry(se, &hist_entry__sort_list, list) { | 1056 | list_for_each_entry(se, &hist_entry__sort_list, list) { |
895 | int i; | 1057 | int i; |
896 | 1058 | ||
1059 | if (exclude_other && (se == &sort_parent)) | ||
1060 | continue; | ||
1061 | |||
897 | fprintf(fp, " "); | 1062 | fprintf(fp, " "); |
898 | for (i = 0; i < strlen(se->header); i++) | 1063 | for (i = 0; i < strlen(se->header); i++) |
899 | fprintf(fp, "."); | 1064 | fprintf(fp, "."); |
@@ -907,7 +1072,8 @@ static size_t output__fprintf(FILE *fp, __u64 total_samples) | |||
907 | ret += hist_entry__fprintf(fp, pos, total_samples); | 1072 | ret += hist_entry__fprintf(fp, pos, total_samples); |
908 | } | 1073 | } |
909 | 1074 | ||
910 | if (!strcmp(sort_order, default_sort_order)) { | 1075 | if (sort_order == default_sort_order && |
1076 | parent_pattern == default_parent_pattern) { | ||
911 | fprintf(fp, "#\n"); | 1077 | fprintf(fp, "#\n"); |
912 | fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n"); | 1078 | fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n"); |
913 | fprintf(fp, "#\n"); | 1079 | fprintf(fp, "#\n"); |
@@ -932,7 +1098,21 @@ static unsigned long total = 0, | |||
932 | total_mmap = 0, | 1098 | total_mmap = 0, |
933 | total_comm = 0, | 1099 | total_comm = 0, |
934 | total_fork = 0, | 1100 | total_fork = 0, |
935 | total_unknown = 0; | 1101 | total_unknown = 0, |
1102 | total_lost = 0; | ||
1103 | |||
1104 | static int validate_chain(struct ip_callchain *chain, event_t *event) | ||
1105 | { | ||
1106 | unsigned int chain_size; | ||
1107 | |||
1108 | chain_size = event->header.size; | ||
1109 | chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event; | ||
1110 | |||
1111 | if (chain->nr*sizeof(u64) > chain_size) | ||
1112 | return -1; | ||
1113 | |||
1114 | return 0; | ||
1115 | } | ||
936 | 1116 | ||
937 | static int | 1117 | static int |
938 | process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | 1118 | process_overflow_event(event_t *event, unsigned long offset, unsigned long head) |
@@ -941,12 +1121,16 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | |||
941 | int show = 0; | 1121 | int show = 0; |
942 | struct dso *dso = NULL; | 1122 | struct dso *dso = NULL; |
943 | struct thread *thread = threads__findnew(event->ip.pid); | 1123 | struct thread *thread = threads__findnew(event->ip.pid); |
944 | __u64 ip = event->ip.ip; | 1124 | u64 ip = event->ip.ip; |
945 | __u64 period = 1; | 1125 | u64 period = 1; |
946 | struct map *map = NULL; | 1126 | struct map *map = NULL; |
1127 | void *more_data = event->ip.__more_data; | ||
1128 | struct ip_callchain *chain = NULL; | ||
947 | 1129 | ||
948 | if (event->header.type & PERF_SAMPLE_PERIOD) | 1130 | if (event->header.type & PERF_SAMPLE_PERIOD) { |
949 | period = event->ip.period; | 1131 | period = *(u64 *)more_data; |
1132 | more_data += sizeof(u64); | ||
1133 | } | ||
950 | 1134 | ||
951 | dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", | 1135 | dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", |
952 | (void *)(offset + head), | 1136 | (void *)(offset + head), |
@@ -956,10 +1140,28 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | |||
956 | (void *)(long)ip, | 1140 | (void *)(long)ip, |
957 | (long long)period); | 1141 | (long long)period); |
958 | 1142 | ||
1143 | if (event->header.type & PERF_SAMPLE_CALLCHAIN) { | ||
1144 | int i; | ||
1145 | |||
1146 | chain = (void *)more_data; | ||
1147 | |||
1148 | dprintf("... chain: nr:%Lu\n", chain->nr); | ||
1149 | |||
1150 | if (validate_chain(chain, event) < 0) { | ||
1151 | eprintf("call-chain problem with event, skipping it.\n"); | ||
1152 | return 0; | ||
1153 | } | ||
1154 | |||
1155 | if (dump_trace) { | ||
1156 | for (i = 0; i < chain->nr; i++) | ||
1157 | dprintf("..... %2d: %016Lx\n", i, chain->ips[i]); | ||
1158 | } | ||
1159 | } | ||
1160 | |||
959 | dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); | 1161 | dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); |
960 | 1162 | ||
961 | if (thread == NULL) { | 1163 | if (thread == NULL) { |
962 | fprintf(stderr, "problem processing %d event, skipping it.\n", | 1164 | eprintf("problem processing %d event, skipping it.\n", |
963 | event->header.type); | 1165 | event->header.type); |
964 | return -1; | 1166 | return -1; |
965 | } | 1167 | } |
@@ -977,22 +1179,6 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | |||
977 | show = SHOW_USER; | 1179 | show = SHOW_USER; |
978 | level = '.'; | 1180 | level = '.'; |
979 | 1181 | ||
980 | map = thread__find_map(thread, ip); | ||
981 | if (map != NULL) { | ||
982 | ip = map->map_ip(map, ip); | ||
983 | dso = map->dso; | ||
984 | } else { | ||
985 | /* | ||
986 | * If this is outside of all known maps, | ||
987 | * and is a negative address, try to look it | ||
988 | * up in the kernel dso, as it might be a | ||
989 | * vsyscall (which executes in user-mode): | ||
990 | */ | ||
991 | if ((long long)ip < 0) | ||
992 | dso = kernel_dso; | ||
993 | } | ||
994 | dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>"); | ||
995 | |||
996 | } else { | 1182 | } else { |
997 | show = SHOW_HV; | 1183 | show = SHOW_HV; |
998 | level = 'H'; | 1184 | level = 'H'; |
@@ -1000,14 +1186,10 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | |||
1000 | } | 1186 | } |
1001 | 1187 | ||
1002 | if (show & show_mask) { | 1188 | if (show & show_mask) { |
1003 | struct symbol *sym = NULL; | 1189 | struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); |
1004 | |||
1005 | if (dso) | ||
1006 | sym = dso->find_symbol(dso, ip); | ||
1007 | 1190 | ||
1008 | if (hist_entry__add(thread, map, dso, sym, ip, level, period)) { | 1191 | if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { |
1009 | fprintf(stderr, | 1192 | eprintf("problem incrementing symbol count, skipping event\n"); |
1010 | "problem incrementing symbol count, skipping event\n"); | ||
1011 | return -1; | 1193 | return -1; |
1012 | } | 1194 | } |
1013 | } | 1195 | } |
@@ -1096,8 +1278,60 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head) | |||
1096 | } | 1278 | } |
1097 | 1279 | ||
1098 | static int | 1280 | static int |
1281 | process_lost_event(event_t *event, unsigned long offset, unsigned long head) | ||
1282 | { | ||
1283 | dprintf("%p [%p]: PERF_EVENT_LOST: id:%Ld: lost:%Ld\n", | ||
1284 | (void *)(offset + head), | ||
1285 | (void *)(long)(event->header.size), | ||
1286 | event->lost.id, | ||
1287 | event->lost.lost); | ||
1288 | |||
1289 | total_lost += event->lost.lost; | ||
1290 | |||
1291 | return 0; | ||
1292 | } | ||
1293 | |||
1294 | static void trace_event(event_t *event) | ||
1295 | { | ||
1296 | unsigned char *raw_event = (void *)event; | ||
1297 | char *color = PERF_COLOR_BLUE; | ||
1298 | int i, j; | ||
1299 | |||
1300 | if (!dump_trace) | ||
1301 | return; | ||
1302 | |||
1303 | dprintf("."); | ||
1304 | cdprintf("\n. ... raw event: size %d bytes\n", event->header.size); | ||
1305 | |||
1306 | for (i = 0; i < event->header.size; i++) { | ||
1307 | if ((i & 15) == 0) { | ||
1308 | dprintf("."); | ||
1309 | cdprintf(" %04x: ", i); | ||
1310 | } | ||
1311 | |||
1312 | cdprintf(" %02x", raw_event[i]); | ||
1313 | |||
1314 | if (((i & 15) == 15) || i == event->header.size-1) { | ||
1315 | cdprintf(" "); | ||
1316 | for (j = 0; j < 15-(i & 15); j++) | ||
1317 | cdprintf(" "); | ||
1318 | for (j = 0; j < (i & 15); j++) { | ||
1319 | if (isprint(raw_event[i-15+j])) | ||
1320 | cdprintf("%c", raw_event[i-15+j]); | ||
1321 | else | ||
1322 | cdprintf("."); | ||
1323 | } | ||
1324 | cdprintf("\n"); | ||
1325 | } | ||
1326 | } | ||
1327 | dprintf(".\n"); | ||
1328 | } | ||
1329 | |||
1330 | static int | ||
1099 | process_event(event_t *event, unsigned long offset, unsigned long head) | 1331 | process_event(event_t *event, unsigned long offset, unsigned long head) |
1100 | { | 1332 | { |
1333 | trace_event(event); | ||
1334 | |||
1101 | if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) | 1335 | if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) |
1102 | return process_overflow_event(event, offset, head); | 1336 | return process_overflow_event(event, offset, head); |
1103 | 1337 | ||
@@ -1113,6 +1347,10 @@ process_event(event_t *event, unsigned long offset, unsigned long head) | |||
1113 | 1347 | ||
1114 | case PERF_EVENT_PERIOD: | 1348 | case PERF_EVENT_PERIOD: |
1115 | return process_period_event(event, offset, head); | 1349 | return process_period_event(event, offset, head); |
1350 | |||
1351 | case PERF_EVENT_LOST: | ||
1352 | return process_lost_event(event, offset, head); | ||
1353 | |||
1116 | /* | 1354 | /* |
1117 | * We dont process them right now but they are fine: | 1355 | * We dont process them right now but they are fine: |
1118 | */ | 1356 | */ |
@@ -1128,11 +1366,13 @@ process_event(event_t *event, unsigned long offset, unsigned long head) | |||
1128 | return 0; | 1366 | return 0; |
1129 | } | 1367 | } |
1130 | 1368 | ||
1369 | static struct perf_file_header file_header; | ||
1370 | |||
1131 | static int __cmd_report(void) | 1371 | static int __cmd_report(void) |
1132 | { | 1372 | { |
1133 | int ret, rc = EXIT_FAILURE; | 1373 | int ret, rc = EXIT_FAILURE; |
1134 | unsigned long offset = 0; | 1374 | unsigned long offset = 0; |
1135 | unsigned long head = 0; | 1375 | unsigned long head = sizeof(file_header); |
1136 | struct stat stat; | 1376 | struct stat stat; |
1137 | event_t *event; | 1377 | event_t *event; |
1138 | uint32_t size; | 1378 | uint32_t size; |
@@ -1160,6 +1400,17 @@ static int __cmd_report(void) | |||
1160 | exit(0); | 1400 | exit(0); |
1161 | } | 1401 | } |
1162 | 1402 | ||
1403 | if (read(input, &file_header, sizeof(file_header)) == -1) { | ||
1404 | perror("failed to read file headers"); | ||
1405 | exit(-1); | ||
1406 | } | ||
1407 | |||
1408 | if (sort__has_parent && | ||
1409 | !(file_header.sample_type & PERF_SAMPLE_CALLCHAIN)) { | ||
1410 | fprintf(stderr, "selected --sort parent, but no callchain data\n"); | ||
1411 | exit(-1); | ||
1412 | } | ||
1413 | |||
1163 | if (load_kernel() < 0) { | 1414 | if (load_kernel() < 0) { |
1164 | perror("failed to load kernel symbols"); | 1415 | perror("failed to load kernel symbols"); |
1165 | return EXIT_FAILURE; | 1416 | return EXIT_FAILURE; |
@@ -1204,7 +1455,7 @@ more: | |||
1204 | 1455 | ||
1205 | size = event->header.size; | 1456 | size = event->header.size; |
1206 | 1457 | ||
1207 | dprintf("%p [%p]: event: %d\n", | 1458 | dprintf("\n%p [%p]: event: %d\n", |
1208 | (void *)(offset + head), | 1459 | (void *)(offset + head), |
1209 | (void *)(long)event->header.size, | 1460 | (void *)(long)event->header.size, |
1210 | event->header.type); | 1461 | event->header.type); |
@@ -1231,9 +1482,13 @@ more: | |||
1231 | 1482 | ||
1232 | head += size; | 1483 | head += size; |
1233 | 1484 | ||
1485 | if (offset + head >= sizeof(file_header) + file_header.data_size) | ||
1486 | goto done; | ||
1487 | |||
1234 | if (offset + head < stat.st_size) | 1488 | if (offset + head < stat.st_size) |
1235 | goto more; | 1489 | goto more; |
1236 | 1490 | ||
1491 | done: | ||
1237 | rc = EXIT_SUCCESS; | 1492 | rc = EXIT_SUCCESS; |
1238 | close(input); | 1493 | close(input); |
1239 | 1494 | ||
@@ -1241,6 +1496,7 @@ more: | |||
1241 | dprintf(" mmap events: %10ld\n", total_mmap); | 1496 | dprintf(" mmap events: %10ld\n", total_mmap); |
1242 | dprintf(" comm events: %10ld\n", total_comm); | 1497 | dprintf(" comm events: %10ld\n", total_comm); |
1243 | dprintf(" fork events: %10ld\n", total_fork); | 1498 | dprintf(" fork events: %10ld\n", total_fork); |
1499 | dprintf(" lost events: %10ld\n", total_lost); | ||
1244 | dprintf(" unknown events: %10ld\n", total_unknown); | 1500 | dprintf(" unknown events: %10ld\n", total_unknown); |
1245 | 1501 | ||
1246 | if (dump_trace) | 1502 | if (dump_trace) |
@@ -1273,9 +1529,13 @@ static const struct option options[] = { | |||
1273 | "dump raw trace in ASCII"), | 1529 | "dump raw trace in ASCII"), |
1274 | OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), | 1530 | OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), |
1275 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", | 1531 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", |
1276 | "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"), | 1532 | "sort by key(s): pid, comm, dso, symbol, parent"), |
1277 | OPT_BOOLEAN('P', "full-paths", &full_paths, | 1533 | OPT_BOOLEAN('P', "full-paths", &full_paths, |
1278 | "Don't shorten the pathnames taking into account the cwd"), | 1534 | "Don't shorten the pathnames taking into account the cwd"), |
1535 | OPT_STRING('p', "parent", &parent_pattern, "regex", | ||
1536 | "regex filter to identify parent, see: '--sort parent'"), | ||
1537 | OPT_BOOLEAN('x', "exclude-other", &exclude_other, | ||
1538 | "Only display entries with parent-match"), | ||
1279 | OPT_END() | 1539 | OPT_END() |
1280 | }; | 1540 | }; |
1281 | 1541 | ||
@@ -1304,6 +1564,11 @@ int cmd_report(int argc, const char **argv, const char *prefix) | |||
1304 | 1564 | ||
1305 | setup_sorting(); | 1565 | setup_sorting(); |
1306 | 1566 | ||
1567 | if (parent_pattern != default_parent_pattern) | ||
1568 | sort_dimension__add("parent"); | ||
1569 | else | ||
1570 | exclude_other = 0; | ||
1571 | |||
1307 | /* | 1572 | /* |
1308 | * Any (unrecognized) arguments left? | 1573 | * Any (unrecognized) arguments left? |
1309 | */ | 1574 | */ |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c43e4a97dc42..6d3eeac1ea25 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -43,6 +43,7 @@ | |||
43 | #include "util/parse-events.h" | 43 | #include "util/parse-events.h" |
44 | 44 | ||
45 | #include <sys/prctl.h> | 45 | #include <sys/prctl.h> |
46 | #include <math.h> | ||
46 | 47 | ||
47 | static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { | 48 | static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { |
48 | 49 | ||
@@ -79,12 +80,34 @@ static const unsigned int default_count[] = { | |||
79 | 10000, | 80 | 10000, |
80 | }; | 81 | }; |
81 | 82 | ||
82 | static __u64 event_res[MAX_COUNTERS][3]; | 83 | #define MAX_RUN 100 |
83 | static __u64 event_scaled[MAX_COUNTERS]; | ||
84 | 84 | ||
85 | static __u64 runtime_nsecs; | 85 | static int run_count = 1; |
86 | static __u64 walltime_nsecs; | 86 | static int run_idx = 0; |
87 | static __u64 runtime_cycles; | 87 | |
88 | static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; | ||
89 | static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; | ||
90 | |||
91 | //static u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; | ||
92 | |||
93 | |||
94 | static u64 runtime_nsecs[MAX_RUN]; | ||
95 | static u64 walltime_nsecs[MAX_RUN]; | ||
96 | static u64 runtime_cycles[MAX_RUN]; | ||
97 | |||
98 | static u64 event_res_avg[MAX_COUNTERS][3]; | ||
99 | static u64 event_res_noise[MAX_COUNTERS][3]; | ||
100 | |||
101 | static u64 event_scaled_avg[MAX_COUNTERS]; | ||
102 | |||
103 | static u64 runtime_nsecs_avg; | ||
104 | static u64 runtime_nsecs_noise; | ||
105 | |||
106 | static u64 walltime_nsecs_avg; | ||
107 | static u64 walltime_nsecs_noise; | ||
108 | |||
109 | static u64 runtime_cycles_avg; | ||
110 | static u64 runtime_cycles_noise; | ||
88 | 111 | ||
89 | static void create_perf_stat_counter(int counter) | 112 | static void create_perf_stat_counter(int counter) |
90 | { | 113 | { |
@@ -135,12 +158,12 @@ static inline int nsec_counter(int counter) | |||
135 | */ | 158 | */ |
136 | static void read_counter(int counter) | 159 | static void read_counter(int counter) |
137 | { | 160 | { |
138 | __u64 *count, single_count[3]; | 161 | u64 *count, single_count[3]; |
139 | ssize_t res; | 162 | ssize_t res; |
140 | int cpu, nv; | 163 | int cpu, nv; |
141 | int scaled; | 164 | int scaled; |
142 | 165 | ||
143 | count = event_res[counter]; | 166 | count = event_res[run_idx][counter]; |
144 | 167 | ||
145 | count[0] = count[1] = count[2] = 0; | 168 | count[0] = count[1] = count[2] = 0; |
146 | 169 | ||
@@ -149,8 +172,10 @@ static void read_counter(int counter) | |||
149 | if (fd[cpu][counter] < 0) | 172 | if (fd[cpu][counter] < 0) |
150 | continue; | 173 | continue; |
151 | 174 | ||
152 | res = read(fd[cpu][counter], single_count, nv * sizeof(__u64)); | 175 | res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); |
153 | assert(res == nv * sizeof(__u64)); | 176 | assert(res == nv * sizeof(u64)); |
177 | close(fd[cpu][counter]); | ||
178 | fd[cpu][counter] = -1; | ||
154 | 179 | ||
155 | count[0] += single_count[0]; | 180 | count[0] += single_count[0]; |
156 | if (scale) { | 181 | if (scale) { |
@@ -162,13 +187,13 @@ static void read_counter(int counter) | |||
162 | scaled = 0; | 187 | scaled = 0; |
163 | if (scale) { | 188 | if (scale) { |
164 | if (count[2] == 0) { | 189 | if (count[2] == 0) { |
165 | event_scaled[counter] = -1; | 190 | event_scaled[run_idx][counter] = -1; |
166 | count[0] = 0; | 191 | count[0] = 0; |
167 | return; | 192 | return; |
168 | } | 193 | } |
169 | 194 | ||
170 | if (count[2] < count[1]) { | 195 | if (count[2] < count[1]) { |
171 | event_scaled[counter] = 1; | 196 | event_scaled[run_idx][counter] = 1; |
172 | count[0] = (unsigned long long) | 197 | count[0] = (unsigned long long) |
173 | ((double)count[0] * count[1] / count[2] + 0.5); | 198 | ((double)count[0] * count[1] / count[2] + 0.5); |
174 | } | 199 | } |
@@ -178,10 +203,94 @@ static void read_counter(int counter) | |||
178 | */ | 203 | */ |
179 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | 204 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && |
180 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) | 205 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) |
181 | runtime_nsecs = count[0]; | 206 | runtime_nsecs[run_idx] = count[0]; |
182 | if (attrs[counter].type == PERF_TYPE_HARDWARE && | 207 | if (attrs[counter].type == PERF_TYPE_HARDWARE && |
183 | attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) | 208 | attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) |
184 | runtime_cycles = count[0]; | 209 | runtime_cycles[run_idx] = count[0]; |
210 | } | ||
211 | |||
212 | static int run_perf_stat(int argc, const char **argv) | ||
213 | { | ||
214 | unsigned long long t0, t1; | ||
215 | int status = 0; | ||
216 | int counter; | ||
217 | int pid; | ||
218 | |||
219 | if (!system_wide) | ||
220 | nr_cpus = 1; | ||
221 | |||
222 | for (counter = 0; counter < nr_counters; counter++) | ||
223 | create_perf_stat_counter(counter); | ||
224 | |||
225 | /* | ||
226 | * Enable counters and exec the command: | ||
227 | */ | ||
228 | t0 = rdclock(); | ||
229 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
230 | |||
231 | if ((pid = fork()) < 0) | ||
232 | perror("failed to fork"); | ||
233 | |||
234 | if (!pid) { | ||
235 | if (execvp(argv[0], (char **)argv)) { | ||
236 | perror(argv[0]); | ||
237 | exit(-1); | ||
238 | } | ||
239 | } | ||
240 | |||
241 | wait(&status); | ||
242 | |||
243 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
244 | t1 = rdclock(); | ||
245 | |||
246 | walltime_nsecs[run_idx] = t1 - t0; | ||
247 | |||
248 | for (counter = 0; counter < nr_counters; counter++) | ||
249 | read_counter(counter); | ||
250 | |||
251 | return WEXITSTATUS(status); | ||
252 | } | ||
253 | |||
254 | static void print_noise(u64 *count, u64 *noise) | ||
255 | { | ||
256 | if (run_count > 1) | ||
257 | fprintf(stderr, " ( +- %7.3f%% )", | ||
258 | (double)noise[0]/(count[0]+1)*100.0); | ||
259 | } | ||
260 | |||
261 | static void nsec_printout(int counter, u64 *count, u64 *noise) | ||
262 | { | ||
263 | double msecs = (double)count[0] / 1000000; | ||
264 | |||
265 | fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); | ||
266 | |||
267 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | ||
268 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { | ||
269 | |||
270 | if (walltime_nsecs_avg) | ||
271 | fprintf(stderr, " # %10.3f CPUs ", | ||
272 | (double)count[0] / (double)walltime_nsecs_avg); | ||
273 | } | ||
274 | print_noise(count, noise); | ||
275 | } | ||
276 | |||
277 | static void abs_printout(int counter, u64 *count, u64 *noise) | ||
278 | { | ||
279 | fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); | ||
280 | |||
281 | if (runtime_cycles_avg && | ||
282 | attrs[counter].type == PERF_TYPE_HARDWARE && | ||
283 | attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { | ||
284 | |||
285 | fprintf(stderr, " # %10.3f IPC ", | ||
286 | (double)count[0] / (double)runtime_cycles_avg); | ||
287 | } else { | ||
288 | if (runtime_nsecs_avg) { | ||
289 | fprintf(stderr, " # %10.3f M/sec", | ||
290 | (double)count[0]/runtime_nsecs_avg*1000.0); | ||
291 | } | ||
292 | } | ||
293 | print_noise(count, noise); | ||
185 | } | 294 | } |
186 | 295 | ||
187 | /* | 296 | /* |
@@ -189,11 +298,12 @@ static void read_counter(int counter) | |||
189 | */ | 298 | */ |
190 | static void print_counter(int counter) | 299 | static void print_counter(int counter) |
191 | { | 300 | { |
192 | __u64 *count; | 301 | u64 *count, *noise; |
193 | int scaled; | 302 | int scaled; |
194 | 303 | ||
195 | count = event_res[counter]; | 304 | count = event_res_avg[counter]; |
196 | scaled = event_scaled[counter]; | 305 | noise = event_res_noise[counter]; |
306 | scaled = event_scaled_avg[counter]; | ||
197 | 307 | ||
198 | if (scaled == -1) { | 308 | if (scaled == -1) { |
199 | fprintf(stderr, " %14s %-20s\n", | 309 | fprintf(stderr, " %14s %-20s\n", |
@@ -201,75 +311,107 @@ static void print_counter(int counter) | |||
201 | return; | 311 | return; |
202 | } | 312 | } |
203 | 313 | ||
204 | if (nsec_counter(counter)) { | 314 | if (nsec_counter(counter)) |
205 | double msecs = (double)count[0] / 1000000; | 315 | nsec_printout(counter, count, noise); |
206 | 316 | else | |
207 | fprintf(stderr, " %14.6f %-20s", | 317 | abs_printout(counter, count, noise); |
208 | msecs, event_name(counter)); | ||
209 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | ||
210 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { | ||
211 | 318 | ||
212 | if (walltime_nsecs) | ||
213 | fprintf(stderr, " # %11.3f CPU utilization factor", | ||
214 | (double)count[0] / (double)walltime_nsecs); | ||
215 | } | ||
216 | } else { | ||
217 | fprintf(stderr, " %14Ld %-20s", | ||
218 | count[0], event_name(counter)); | ||
219 | if (runtime_nsecs) | ||
220 | fprintf(stderr, " # %11.3f M/sec", | ||
221 | (double)count[0]/runtime_nsecs*1000.0); | ||
222 | if (runtime_cycles && | ||
223 | attrs[counter].type == PERF_TYPE_HARDWARE && | ||
224 | attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { | ||
225 | |||
226 | fprintf(stderr, " # %1.3f per cycle", | ||
227 | (double)count[0] / (double)runtime_cycles); | ||
228 | } | ||
229 | } | ||
230 | if (scaled) | 319 | if (scaled) |
231 | fprintf(stderr, " (scaled from %.2f%%)", | 320 | fprintf(stderr, " (scaled from %.2f%%)", |
232 | (double) count[2] / count[1] * 100); | 321 | (double) count[2] / count[1] * 100); |
322 | |||
233 | fprintf(stderr, "\n"); | 323 | fprintf(stderr, "\n"); |
234 | } | 324 | } |
235 | 325 | ||
236 | static int do_perf_stat(int argc, const char **argv) | 326 | /* |
327 | * normalize_noise noise values down to stddev: | ||
328 | */ | ||
329 | static void normalize_noise(u64 *val) | ||
237 | { | 330 | { |
238 | unsigned long long t0, t1; | 331 | double res; |
239 | int counter; | ||
240 | int status; | ||
241 | int pid; | ||
242 | int i; | ||
243 | 332 | ||
244 | if (!system_wide) | 333 | res = (double)*val / (run_count * sqrt((double)run_count)); |
245 | nr_cpus = 1; | ||
246 | 334 | ||
247 | for (counter = 0; counter < nr_counters; counter++) | 335 | *val = (u64)res; |
248 | create_perf_stat_counter(counter); | 336 | } |
249 | 337 | ||
250 | /* | 338 | static void update_avg(const char *name, int idx, u64 *avg, u64 *val) |
251 | * Enable counters and exec the command: | 339 | { |
252 | */ | 340 | *avg += *val; |
253 | t0 = rdclock(); | ||
254 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
255 | 341 | ||
256 | if ((pid = fork()) < 0) | 342 | if (verbose > 1) |
257 | perror("failed to fork"); | 343 | fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val); |
344 | } | ||
345 | /* | ||
346 | * Calculate the averages and noises: | ||
347 | */ | ||
348 | static void calc_avg(void) | ||
349 | { | ||
350 | int i, j; | ||
351 | |||
352 | if (verbose > 1) | ||
353 | fprintf(stderr, "\n"); | ||
354 | |||
355 | for (i = 0; i < run_count; i++) { | ||
356 | update_avg("runtime", 0, &runtime_nsecs_avg, runtime_nsecs + i); | ||
357 | update_avg("walltime", 0, &walltime_nsecs_avg, walltime_nsecs + i); | ||
358 | update_avg("runtime_cycles", 0, &runtime_cycles_avg, runtime_cycles + i); | ||
359 | |||
360 | for (j = 0; j < nr_counters; j++) { | ||
361 | update_avg("counter/0", j, | ||
362 | event_res_avg[j]+0, event_res[i][j]+0); | ||
363 | update_avg("counter/1", j, | ||
364 | event_res_avg[j]+1, event_res[i][j]+1); | ||
365 | update_avg("counter/2", j, | ||
366 | event_res_avg[j]+2, event_res[i][j]+2); | ||
367 | update_avg("scaled", j, | ||
368 | event_scaled_avg + j, event_scaled[i]+j); | ||
369 | } | ||
370 | } | ||
371 | runtime_nsecs_avg /= run_count; | ||
372 | walltime_nsecs_avg /= run_count; | ||
373 | runtime_cycles_avg /= run_count; | ||
374 | |||
375 | for (j = 0; j < nr_counters; j++) { | ||
376 | event_res_avg[j][0] /= run_count; | ||
377 | event_res_avg[j][1] /= run_count; | ||
378 | event_res_avg[j][2] /= run_count; | ||
379 | } | ||
258 | 380 | ||
259 | if (!pid) { | 381 | for (i = 0; i < run_count; i++) { |
260 | if (execvp(argv[0], (char **)argv)) { | 382 | runtime_nsecs_noise += |
261 | perror(argv[0]); | 383 | abs((s64)(runtime_nsecs[i] - runtime_nsecs_avg)); |
262 | exit(-1); | 384 | walltime_nsecs_noise += |
385 | abs((s64)(walltime_nsecs[i] - walltime_nsecs_avg)); | ||
386 | runtime_cycles_noise += | ||
387 | abs((s64)(runtime_cycles[i] - runtime_cycles_avg)); | ||
388 | |||
389 | for (j = 0; j < nr_counters; j++) { | ||
390 | event_res_noise[j][0] += | ||
391 | abs((s64)(event_res[i][j][0] - event_res_avg[j][0])); | ||
392 | event_res_noise[j][1] += | ||
393 | abs((s64)(event_res[i][j][1] - event_res_avg[j][1])); | ||
394 | event_res_noise[j][2] += | ||
395 | abs((s64)(event_res[i][j][2] - event_res_avg[j][2])); | ||
263 | } | 396 | } |
264 | } | 397 | } |
265 | 398 | ||
266 | while (wait(&status) >= 0) | 399 | normalize_noise(&runtime_nsecs_noise); |
267 | ; | 400 | normalize_noise(&walltime_nsecs_noise); |
401 | normalize_noise(&runtime_cycles_noise); | ||
268 | 402 | ||
269 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | 403 | for (j = 0; j < nr_counters; j++) { |
270 | t1 = rdclock(); | 404 | normalize_noise(&event_res_noise[j][0]); |
405 | normalize_noise(&event_res_noise[j][1]); | ||
406 | normalize_noise(&event_res_noise[j][2]); | ||
407 | } | ||
408 | } | ||
271 | 409 | ||
272 | walltime_nsecs = t1 - t0; | 410 | static void print_stat(int argc, const char **argv) |
411 | { | ||
412 | int i, counter; | ||
413 | |||
414 | calc_avg(); | ||
273 | 415 | ||
274 | fflush(stdout); | 416 | fflush(stdout); |
275 | 417 | ||
@@ -279,22 +421,19 @@ static int do_perf_stat(int argc, const char **argv) | |||
279 | for (i = 1; i < argc; i++) | 421 | for (i = 1; i < argc; i++) |
280 | fprintf(stderr, " %s", argv[i]); | 422 | fprintf(stderr, " %s", argv[i]); |
281 | 423 | ||
282 | fprintf(stderr, "\':\n"); | 424 | fprintf(stderr, "\'"); |
283 | fprintf(stderr, "\n"); | 425 | if (run_count > 1) |
284 | 426 | fprintf(stderr, " (%d runs)", run_count); | |
285 | for (counter = 0; counter < nr_counters; counter++) | 427 | fprintf(stderr, ":\n\n"); |
286 | read_counter(counter); | ||
287 | 428 | ||
288 | for (counter = 0; counter < nr_counters; counter++) | 429 | for (counter = 0; counter < nr_counters; counter++) |
289 | print_counter(counter); | 430 | print_counter(counter); |
290 | 431 | ||
291 | 432 | ||
292 | fprintf(stderr, "\n"); | 433 | fprintf(stderr, "\n"); |
293 | fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", | 434 | fprintf(stderr, " %14.9f seconds time elapsed.\n", |
294 | (double)(t1-t0)/1e6); | 435 | (double)walltime_nsecs_avg/1e9); |
295 | fprintf(stderr, "\n"); | 436 | fprintf(stderr, "\n"); |
296 | |||
297 | return 0; | ||
298 | } | 437 | } |
299 | 438 | ||
300 | static volatile int signr = -1; | 439 | static volatile int signr = -1; |
@@ -332,11 +471,15 @@ static const struct option options[] = { | |||
332 | "scale/normalize counters"), | 471 | "scale/normalize counters"), |
333 | OPT_BOOLEAN('v', "verbose", &verbose, | 472 | OPT_BOOLEAN('v', "verbose", &verbose, |
334 | "be more verbose (show counter open errors, etc)"), | 473 | "be more verbose (show counter open errors, etc)"), |
474 | OPT_INTEGER('r', "repeat", &run_count, | ||
475 | "repeat command and print average + stddev (max: 100)"), | ||
335 | OPT_END() | 476 | OPT_END() |
336 | }; | 477 | }; |
337 | 478 | ||
338 | int cmd_stat(int argc, const char **argv, const char *prefix) | 479 | int cmd_stat(int argc, const char **argv, const char *prefix) |
339 | { | 480 | { |
481 | int status; | ||
482 | |||
340 | page_size = sysconf(_SC_PAGE_SIZE); | 483 | page_size = sysconf(_SC_PAGE_SIZE); |
341 | 484 | ||
342 | memcpy(attrs, default_attrs, sizeof(attrs)); | 485 | memcpy(attrs, default_attrs, sizeof(attrs)); |
@@ -344,6 +487,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix) | |||
344 | argc = parse_options(argc, argv, options, stat_usage, 0); | 487 | argc = parse_options(argc, argv, options, stat_usage, 0); |
345 | if (!argc) | 488 | if (!argc) |
346 | usage_with_options(stat_usage, options); | 489 | usage_with_options(stat_usage, options); |
490 | if (run_count <= 0 || run_count > MAX_RUN) | ||
491 | usage_with_options(stat_usage, options); | ||
347 | 492 | ||
348 | if (!nr_counters) | 493 | if (!nr_counters) |
349 | nr_counters = 8; | 494 | nr_counters = 8; |
@@ -363,5 +508,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix) | |||
363 | signal(SIGALRM, skip_signal); | 508 | signal(SIGALRM, skip_signal); |
364 | signal(SIGABRT, skip_signal); | 509 | signal(SIGABRT, skip_signal); |
365 | 510 | ||
366 | return do_perf_stat(argc, argv); | 511 | status = 0; |
512 | for (run_idx = 0; run_idx < run_count; run_idx++) { | ||
513 | if (run_count != 1 && verbose) | ||
514 | fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); | ||
515 | status = run_perf_stat(argc, argv); | ||
516 | } | ||
517 | |||
518 | print_stat(argc, argv); | ||
519 | |||
520 | return status; | ||
367 | } | 521 | } |
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index fe338d3c5d7e..5352b5e352ed 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
@@ -54,7 +54,7 @@ static int system_wide = 0; | |||
54 | 54 | ||
55 | static int default_interval = 100000; | 55 | static int default_interval = 100000; |
56 | 56 | ||
57 | static __u64 count_filter = 5; | 57 | static u64 count_filter = 5; |
58 | static int print_entries = 15; | 58 | static int print_entries = 15; |
59 | 59 | ||
60 | static int target_pid = -1; | 60 | static int target_pid = -1; |
@@ -79,8 +79,8 @@ static int dump_symtab; | |||
79 | * Symbols | 79 | * Symbols |
80 | */ | 80 | */ |
81 | 81 | ||
82 | static __u64 min_ip; | 82 | static u64 min_ip; |
83 | static __u64 max_ip = -1ll; | 83 | static u64 max_ip = -1ll; |
84 | 84 | ||
85 | struct sym_entry { | 85 | struct sym_entry { |
86 | struct rb_node rb_node; | 86 | struct rb_node rb_node; |
@@ -194,7 +194,7 @@ static void print_sym_table(void) | |||
194 | 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); | 194 | 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); |
195 | 195 | ||
196 | if (nr_counters == 1) { | 196 | if (nr_counters == 1) { |
197 | printf("%Ld", attrs[0].sample_period); | 197 | printf("%Ld", (u64)attrs[0].sample_period); |
198 | if (freq) | 198 | if (freq) |
199 | printf("Hz "); | 199 | printf("Hz "); |
200 | else | 200 | else |
@@ -372,7 +372,7 @@ out_delete_dso: | |||
372 | /* | 372 | /* |
373 | * Binary search in the histogram table and record the hit: | 373 | * Binary search in the histogram table and record the hit: |
374 | */ | 374 | */ |
375 | static void record_ip(__u64 ip, int counter) | 375 | static void record_ip(u64 ip, int counter) |
376 | { | 376 | { |
377 | struct symbol *sym = dso__find_symbol(kernel_dso, ip); | 377 | struct symbol *sym = dso__find_symbol(kernel_dso, ip); |
378 | 378 | ||
@@ -392,7 +392,7 @@ static void record_ip(__u64 ip, int counter) | |||
392 | samples--; | 392 | samples--; |
393 | } | 393 | } |
394 | 394 | ||
395 | static void process_event(__u64 ip, int counter) | 395 | static void process_event(u64 ip, int counter) |
396 | { | 396 | { |
397 | samples++; | 397 | samples++; |
398 | 398 | ||
@@ -463,15 +463,15 @@ static void mmap_read_counter(struct mmap_data *md) | |||
463 | for (; old != head;) { | 463 | for (; old != head;) { |
464 | struct ip_event { | 464 | struct ip_event { |
465 | struct perf_event_header header; | 465 | struct perf_event_header header; |
466 | __u64 ip; | 466 | u64 ip; |
467 | __u32 pid, target_pid; | 467 | u32 pid, target_pid; |
468 | }; | 468 | }; |
469 | struct mmap_event { | 469 | struct mmap_event { |
470 | struct perf_event_header header; | 470 | struct perf_event_header header; |
471 | __u32 pid, target_pid; | 471 | u32 pid, target_pid; |
472 | __u64 start; | 472 | u64 start; |
473 | __u64 len; | 473 | u64 len; |
474 | __u64 pgoff; | 474 | u64 pgoff; |
475 | char filename[PATH_MAX]; | 475 | char filename[PATH_MAX]; |
476 | }; | 476 | }; |
477 | 477 | ||
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 87a1aca4a424..bccb529dac08 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <sys/syscall.h> | 19 | #include <sys/syscall.h> |
20 | 20 | ||
21 | #include "../../include/linux/perf_counter.h" | 21 | #include "../../include/linux/perf_counter.h" |
22 | #include "types.h" | ||
22 | 23 | ||
23 | /* | 24 | /* |
24 | * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all | 25 | * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all |
@@ -65,4 +66,10 @@ sys_perf_counter_open(struct perf_counter_attr *attr, | |||
65 | #define MAX_COUNTERS 256 | 66 | #define MAX_COUNTERS 256 |
66 | #define MAX_NR_CPUS 256 | 67 | #define MAX_NR_CPUS 256 |
67 | 68 | ||
69 | struct perf_file_header { | ||
70 | u64 version; | ||
71 | u64 sample_type; | ||
72 | u64 data_size; | ||
73 | }; | ||
74 | |||
68 | #endif | 75 | #endif |
diff --git a/tools/perf/types.h b/tools/perf/types.h new file mode 100644 index 000000000000..5e75f9005940 --- /dev/null +++ b/tools/perf/types.h | |||
@@ -0,0 +1,17 @@ | |||
1 | #ifndef _PERF_TYPES_H | ||
2 | #define _PERF_TYPES_H | ||
3 | |||
4 | /* | ||
5 | * We define u64 as unsigned long long for every architecture | ||
6 | * so that we can print it with %Lx without getting warnings. | ||
7 | */ | ||
8 | typedef unsigned long long u64; | ||
9 | typedef signed long long s64; | ||
10 | typedef unsigned int u32; | ||
11 | typedef signed int s32; | ||
12 | typedef unsigned short u16; | ||
13 | typedef signed short s16; | ||
14 | typedef unsigned char u8; | ||
15 | typedef signed char s8; | ||
16 | |||
17 | #endif /* _PERF_TYPES_H */ | ||
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c index b90ec004f29c..0b791bd346bc 100644 --- a/tools/perf/util/ctype.c +++ b/tools/perf/util/ctype.c | |||
@@ -11,16 +11,21 @@ enum { | |||
11 | D = GIT_DIGIT, | 11 | D = GIT_DIGIT, |
12 | G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ | 12 | G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ |
13 | R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ | 13 | R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ |
14 | P = GIT_PRINT_EXTRA, /* printable - alpha - digit - glob - regex */ | ||
15 | |||
16 | PS = GIT_SPACE | GIT_PRINT_EXTRA, | ||
14 | }; | 17 | }; |
15 | 18 | ||
16 | unsigned char sane_ctype[256] = { | 19 | unsigned char sane_ctype[256] = { |
20 | /* 0 1 2 3 4 5 6 7 8 9 A B C D E F */ | ||
21 | |||
17 | 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ | 22 | 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ |
18 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ | 23 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ |
19 | S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */ | 24 | PS,P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */ |
20 | D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ | 25 | D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */ |
21 | 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ | 26 | P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ |
22 | A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */ | 27 | A, A, A, A, A, A, A, A, A, A, A, G, G, P, R, P, /* 80.. 95 */ |
23 | 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ | 28 | P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ |
24 | A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */ | 29 | A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */ |
25 | /* Nothing in the 128.. range */ | 30 | /* Nothing in the 128.. range */ |
26 | }; | 31 | }; |
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5a72586e1df0..35d04da38d6a 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
@@ -13,8 +13,8 @@ int nr_counters; | |||
13 | struct perf_counter_attr attrs[MAX_COUNTERS]; | 13 | struct perf_counter_attr attrs[MAX_COUNTERS]; |
14 | 14 | ||
15 | struct event_symbol { | 15 | struct event_symbol { |
16 | __u8 type; | 16 | u8 type; |
17 | __u64 config; | 17 | u64 config; |
18 | char *symbol; | 18 | char *symbol; |
19 | }; | 19 | }; |
20 | 20 | ||
@@ -63,8 +63,8 @@ static char *hw_event_names[] = { | |||
63 | }; | 63 | }; |
64 | 64 | ||
65 | static char *sw_event_names[] = { | 65 | static char *sw_event_names[] = { |
66 | "cpu-clock-ticks", | 66 | "cpu-clock-msecs", |
67 | "task-clock-ticks", | 67 | "task-clock-msecs", |
68 | "page-faults", | 68 | "page-faults", |
69 | "context-switches", | 69 | "context-switches", |
70 | "CPU-migrations", | 70 | "CPU-migrations", |
@@ -96,7 +96,7 @@ static char *hw_cache_result [][MAX_ALIASES] = { | |||
96 | 96 | ||
97 | char *event_name(int counter) | 97 | char *event_name(int counter) |
98 | { | 98 | { |
99 | __u64 config = attrs[counter].config; | 99 | u64 config = attrs[counter].config; |
100 | int type = attrs[counter].type; | 100 | int type = attrs[counter].type; |
101 | static char buf[32]; | 101 | static char buf[32]; |
102 | 102 | ||
@@ -112,7 +112,7 @@ char *event_name(int counter) | |||
112 | return "unknown-hardware"; | 112 | return "unknown-hardware"; |
113 | 113 | ||
114 | case PERF_TYPE_HW_CACHE: { | 114 | case PERF_TYPE_HW_CACHE: { |
115 | __u8 cache_type, cache_op, cache_result; | 115 | u8 cache_type, cache_op, cache_result; |
116 | static char name[100]; | 116 | static char name[100]; |
117 | 117 | ||
118 | cache_type = (config >> 0) & 0xff; | 118 | cache_type = (config >> 0) & 0xff; |
@@ -202,7 +202,7 @@ static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *a | |||
202 | */ | 202 | */ |
203 | static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) | 203 | static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) |
204 | { | 204 | { |
205 | __u64 config, id; | 205 | u64 config, id; |
206 | int type; | 206 | int type; |
207 | unsigned int i; | 207 | unsigned int i; |
208 | const char *sep, *pstr; | 208 | const char *sep, *pstr; |
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index ec33c0c7f4e2..c93eca9a7be3 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c | |||
@@ -15,7 +15,7 @@ static int hex(char ch) | |||
15 | * While we find nice hex chars, build a long_val. | 15 | * While we find nice hex chars, build a long_val. |
16 | * Return number of chars processed. | 16 | * Return number of chars processed. |
17 | */ | 17 | */ |
18 | int hex2u64(const char *ptr, __u64 *long_val) | 18 | int hex2u64(const char *ptr, u64 *long_val) |
19 | { | 19 | { |
20 | const char *p = ptr; | 20 | const char *p = ptr; |
21 | *long_val = 0; | 21 | *long_val = 0; |
diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h index 72812c1c9a7a..37b03255b425 100644 --- a/tools/perf/util/string.h +++ b/tools/perf/util/string.h | |||
@@ -1,8 +1,8 @@ | |||
1 | #ifndef _PERF_STRING_H_ | 1 | #ifndef _PERF_STRING_H_ |
2 | #define _PERF_STRING_H_ | 2 | #define _PERF_STRING_H_ |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include "../types.h" |
5 | 5 | ||
6 | int hex2u64(const char *ptr, __u64 *val); | 6 | int hex2u64(const char *ptr, u64 *val); |
7 | 7 | ||
8 | #endif | 8 | #endif |
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 49a55f813712..86e14375e74e 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c | |||
@@ -9,9 +9,9 @@ | |||
9 | 9 | ||
10 | const char *sym_hist_filter; | 10 | const char *sym_hist_filter; |
11 | 11 | ||
12 | static struct symbol *symbol__new(__u64 start, __u64 len, | 12 | static struct symbol *symbol__new(u64 start, u64 len, |
13 | const char *name, unsigned int priv_size, | 13 | const char *name, unsigned int priv_size, |
14 | __u64 obj_start, int verbose) | 14 | u64 obj_start, int verbose) |
15 | { | 15 | { |
16 | size_t namelen = strlen(name) + 1; | 16 | size_t namelen = strlen(name) + 1; |
17 | struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen); | 17 | struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen); |
@@ -21,14 +21,14 @@ static struct symbol *symbol__new(__u64 start, __u64 len, | |||
21 | 21 | ||
22 | if (verbose >= 2) | 22 | if (verbose >= 2) |
23 | printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n", | 23 | printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n", |
24 | (__u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start); | 24 | (u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start); |
25 | 25 | ||
26 | self->obj_start= obj_start; | 26 | self->obj_start= obj_start; |
27 | self->hist = NULL; | 27 | self->hist = NULL; |
28 | self->hist_sum = 0; | 28 | self->hist_sum = 0; |
29 | 29 | ||
30 | if (sym_hist_filter && !strcmp(name, sym_hist_filter)) | 30 | if (sym_hist_filter && !strcmp(name, sym_hist_filter)) |
31 | self->hist = calloc(sizeof(__u64), len); | 31 | self->hist = calloc(sizeof(u64), len); |
32 | 32 | ||
33 | if (priv_size) { | 33 | if (priv_size) { |
34 | memset(self, 0, priv_size); | 34 | memset(self, 0, priv_size); |
@@ -89,7 +89,7 @@ static void dso__insert_symbol(struct dso *self, struct symbol *sym) | |||
89 | { | 89 | { |
90 | struct rb_node **p = &self->syms.rb_node; | 90 | struct rb_node **p = &self->syms.rb_node; |
91 | struct rb_node *parent = NULL; | 91 | struct rb_node *parent = NULL; |
92 | const __u64 ip = sym->start; | 92 | const u64 ip = sym->start; |
93 | struct symbol *s; | 93 | struct symbol *s; |
94 | 94 | ||
95 | while (*p != NULL) { | 95 | while (*p != NULL) { |
@@ -104,7 +104,7 @@ static void dso__insert_symbol(struct dso *self, struct symbol *sym) | |||
104 | rb_insert_color(&sym->rb_node, &self->syms); | 104 | rb_insert_color(&sym->rb_node, &self->syms); |
105 | } | 105 | } |
106 | 106 | ||
107 | struct symbol *dso__find_symbol(struct dso *self, __u64 ip) | 107 | struct symbol *dso__find_symbol(struct dso *self, u64 ip) |
108 | { | 108 | { |
109 | struct rb_node *n; | 109 | struct rb_node *n; |
110 | 110 | ||
@@ -151,7 +151,7 @@ static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verb | |||
151 | goto out_failure; | 151 | goto out_failure; |
152 | 152 | ||
153 | while (!feof(file)) { | 153 | while (!feof(file)) { |
154 | __u64 start; | 154 | u64 start; |
155 | struct symbol *sym; | 155 | struct symbol *sym; |
156 | int line_len, len; | 156 | int line_len, len; |
157 | char symbol_type; | 157 | char symbol_type; |
@@ -232,7 +232,7 @@ static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int verb | |||
232 | goto out_failure; | 232 | goto out_failure; |
233 | 233 | ||
234 | while (!feof(file)) { | 234 | while (!feof(file)) { |
235 | __u64 start, size; | 235 | u64 start, size; |
236 | struct symbol *sym; | 236 | struct symbol *sym; |
237 | int line_len, len; | 237 | int line_len, len; |
238 | 238 | ||
@@ -353,7 +353,7 @@ static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf, | |||
353 | { | 353 | { |
354 | uint32_t nr_rel_entries, idx; | 354 | uint32_t nr_rel_entries, idx; |
355 | GElf_Sym sym; | 355 | GElf_Sym sym; |
356 | __u64 plt_offset; | 356 | u64 plt_offset; |
357 | GElf_Shdr shdr_plt; | 357 | GElf_Shdr shdr_plt; |
358 | struct symbol *f; | 358 | struct symbol *f; |
359 | GElf_Shdr shdr_rel_plt; | 359 | GElf_Shdr shdr_rel_plt; |
@@ -523,7 +523,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name, | |||
523 | 523 | ||
524 | elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { | 524 | elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { |
525 | struct symbol *f; | 525 | struct symbol *f; |
526 | __u64 obj_start; | 526 | u64 obj_start; |
527 | 527 | ||
528 | if (!elf_sym__is_function(&sym)) | 528 | if (!elf_sym__is_function(&sym)) |
529 | continue; | 529 | continue; |
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 0d1292bd8270..ea332e56e458 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -2,16 +2,18 @@ | |||
2 | #define _PERF_SYMBOL_ 1 | 2 | #define _PERF_SYMBOL_ 1 |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #include "../types.h" | ||
5 | #include "list.h" | 6 | #include "list.h" |
6 | #include "rbtree.h" | 7 | #include "rbtree.h" |
7 | 8 | ||
8 | struct symbol { | 9 | struct symbol { |
9 | struct rb_node rb_node; | 10 | struct rb_node rb_node; |
10 | __u64 start; | 11 | u64 start; |
11 | __u64 end; | 12 | u64 end; |
12 | __u64 obj_start; | 13 | u64 obj_start; |
13 | __u64 hist_sum; | 14 | u64 hist_sum; |
14 | __u64 *hist; | 15 | u64 *hist; |
16 | void *priv; | ||
15 | char name[0]; | 17 | char name[0]; |
16 | }; | 18 | }; |
17 | 19 | ||
@@ -19,7 +21,7 @@ struct dso { | |||
19 | struct list_head node; | 21 | struct list_head node; |
20 | struct rb_root syms; | 22 | struct rb_root syms; |
21 | unsigned int sym_priv_size; | 23 | unsigned int sym_priv_size; |
22 | struct symbol *(*find_symbol)(struct dso *, __u64 ip); | 24 | struct symbol *(*find_symbol)(struct dso *, u64 ip); |
23 | char name[0]; | 25 | char name[0]; |
24 | }; | 26 | }; |
25 | 27 | ||
@@ -35,7 +37,7 @@ static inline void *dso__sym_priv(struct dso *self, struct symbol *sym) | |||
35 | return ((void *)sym) - self->sym_priv_size; | 37 | return ((void *)sym) - self->sym_priv_size; |
36 | } | 38 | } |
37 | 39 | ||
38 | struct symbol *dso__find_symbol(struct dso *self, __u64 ip); | 40 | struct symbol *dso__find_symbol(struct dso *self, u64 ip); |
39 | 41 | ||
40 | int dso__load_kernel(struct dso *self, const char *vmlinux, | 42 | int dso__load_kernel(struct dso *self, const char *vmlinux, |
41 | symbol_filter_t filter, int verbose); | 43 | symbol_filter_t filter, int verbose); |
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 76590a16c271..b8cfed776d81 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h | |||
@@ -100,11 +100,6 @@ | |||
100 | #include <iconv.h> | 100 | #include <iconv.h> |
101 | #endif | 101 | #endif |
102 | 102 | ||
103 | #ifndef NO_OPENSSL | ||
104 | #include <openssl/ssl.h> | ||
105 | #include <openssl/err.h> | ||
106 | #endif | ||
107 | |||
108 | /* On most systems <limits.h> would have given us this, but | 103 | /* On most systems <limits.h> would have given us this, but |
109 | * not on some systems (e.g. GNU/Hurd). | 104 | * not on some systems (e.g. GNU/Hurd). |
110 | */ | 105 | */ |
@@ -332,17 +327,20 @@ static inline int has_extension(const char *filename, const char *ext) | |||
332 | #undef tolower | 327 | #undef tolower |
333 | #undef toupper | 328 | #undef toupper |
334 | extern unsigned char sane_ctype[256]; | 329 | extern unsigned char sane_ctype[256]; |
335 | #define GIT_SPACE 0x01 | 330 | #define GIT_SPACE 0x01 |
336 | #define GIT_DIGIT 0x02 | 331 | #define GIT_DIGIT 0x02 |
337 | #define GIT_ALPHA 0x04 | 332 | #define GIT_ALPHA 0x04 |
338 | #define GIT_GLOB_SPECIAL 0x08 | 333 | #define GIT_GLOB_SPECIAL 0x08 |
339 | #define GIT_REGEX_SPECIAL 0x10 | 334 | #define GIT_REGEX_SPECIAL 0x10 |
335 | #define GIT_PRINT_EXTRA 0x20 | ||
336 | #define GIT_PRINT 0x3E | ||
340 | #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) | 337 | #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) |
341 | #define isascii(x) (((x) & ~0x7f) == 0) | 338 | #define isascii(x) (((x) & ~0x7f) == 0) |
342 | #define isspace(x) sane_istest(x,GIT_SPACE) | 339 | #define isspace(x) sane_istest(x,GIT_SPACE) |
343 | #define isdigit(x) sane_istest(x,GIT_DIGIT) | 340 | #define isdigit(x) sane_istest(x,GIT_DIGIT) |
344 | #define isalpha(x) sane_istest(x,GIT_ALPHA) | 341 | #define isalpha(x) sane_istest(x,GIT_ALPHA) |
345 | #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) | 342 | #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) |
343 | #define isprint(x) sane_istest(x,GIT_PRINT) | ||
346 | #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) | 344 | #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) |
347 | #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) | 345 | #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) |
348 | #define tolower(x) sane_case((unsigned char)(x), 0x20) | 346 | #define tolower(x) sane_case((unsigned char)(x), 0x20) |