diff options
178 files changed, 29785 insertions, 323 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 72d3bf08d79b..7bcdebffdab3 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -1083,6 +1083,10 @@ and is between 256 and 4096 characters. It is defined in the file | |||
1083 | Configure the RouterBoard 532 series on-chip | 1083 | Configure the RouterBoard 532 series on-chip |
1084 | Ethernet adapter MAC address. | 1084 | Ethernet adapter MAC address. |
1085 | 1085 | ||
1086 | kmemleak= [KNL] Boot-time kmemleak enable/disable | ||
1087 | Valid arguments: on, off | ||
1088 | Default: on | ||
1089 | |||
1086 | kstack=N [X86] Print N words from the kernel stack | 1090 | kstack=N [X86] Print N words from the kernel stack |
1087 | in oops dumps. | 1091 | in oops dumps. |
1088 | 1092 | ||
diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt new file mode 100644 index 000000000000..0112da3b9ab8 --- /dev/null +++ b/Documentation/kmemleak.txt | |||
@@ -0,0 +1,142 @@ | |||
1 | Kernel Memory Leak Detector | ||
2 | =========================== | ||
3 | |||
4 | Introduction | ||
5 | ------------ | ||
6 | |||
7 | Kmemleak provides a way of detecting possible kernel memory leaks in a | ||
8 | way similar to a tracing garbage collector | ||
9 | (http://en.wikipedia.org/wiki/Garbage_collection_%28computer_science%29#Tracing_garbage_collectors), | ||
10 | with the difference that the orphan objects are not freed but only | ||
11 | reported via /sys/kernel/debug/kmemleak. A similar method is used by the | ||
12 | Valgrind tool (memcheck --leak-check) to detect the memory leaks in | ||
13 | user-space applications. | ||
14 | |||
15 | Usage | ||
16 | ----- | ||
17 | |||
18 | CONFIG_DEBUG_KMEMLEAK in "Kernel hacking" has to be enabled. A kernel | ||
19 | thread scans the memory every 10 minutes (by default) and prints any new | ||
20 | unreferenced objects found. To trigger an intermediate scan and display | ||
21 | all the possible memory leaks: | ||
22 | |||
23 | # mount -t debugfs nodev /sys/kernel/debug/ | ||
24 | # cat /sys/kernel/debug/kmemleak | ||
25 | |||
26 | Note that the orphan objects are listed in the order they were allocated | ||
27 | and one object at the beginning of the list may cause other subsequent | ||
28 | objects to be reported as orphan. | ||
29 | |||
30 | Memory scanning parameters can be modified at run-time by writing to the | ||
31 | /sys/kernel/debug/kmemleak file. The following parameters are supported: | ||
32 | |||
33 | off - disable kmemleak (irreversible) | ||
34 | stack=on - enable the task stacks scanning | ||
35 | stack=off - disable the tasks stacks scanning | ||
36 | scan=on - start the automatic memory scanning thread | ||
37 | scan=off - stop the automatic memory scanning thread | ||
38 | scan=<secs> - set the automatic memory scanning period in seconds (0 | ||
39 | to disable it) | ||
40 | |||
41 | Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on | ||
42 | the kernel command line. | ||
43 | |||
44 | Basic Algorithm | ||
45 | --------------- | ||
46 | |||
47 | The memory allocations via kmalloc, vmalloc, kmem_cache_alloc and | ||
48 | friends are traced and the pointers, together with additional | ||
49 | information like size and stack trace, are stored in a prio search tree. | ||
50 | The corresponding freeing function calls are tracked and the pointers | ||
51 | removed from the kmemleak data structures. | ||
52 | |||
53 | An allocated block of memory is considered orphan if no pointer to its | ||
54 | start address or to any location inside the block can be found by | ||
55 | scanning the memory (including saved registers). This means that there | ||
56 | might be no way for the kernel to pass the address of the allocated | ||
57 | block to a freeing function and therefore the block is considered a | ||
58 | memory leak. | ||
59 | |||
60 | The scanning algorithm steps: | ||
61 | |||
62 | 1. mark all objects as white (remaining white objects will later be | ||
63 | considered orphan) | ||
64 | 2. scan the memory starting with the data section and stacks, checking | ||
65 | the values against the addresses stored in the prio search tree. If | ||
66 | a pointer to a white object is found, the object is added to the | ||
67 | gray list | ||
68 | 3. scan the gray objects for matching addresses (some white objects | ||
69 | can become gray and added at the end of the gray list) until the | ||
70 | gray set is finished | ||
71 | 4. the remaining white objects are considered orphan and reported via | ||
72 | /sys/kernel/debug/kmemleak | ||
73 | |||
74 | Some allocated memory blocks have pointers stored in the kernel's | ||
75 | internal data structures and they cannot be detected as orphans. To | ||
76 | avoid this, kmemleak can also store the number of values pointing to an | ||
77 | address inside the block address range that need to be found so that the | ||
78 | block is not considered a leak. One example is __vmalloc(). | ||
79 | |||
80 | Kmemleak API | ||
81 | ------------ | ||
82 | |||
83 | See the include/linux/kmemleak.h header for the functions prototype. | ||
84 | |||
85 | kmemleak_init - initialize kmemleak | ||
86 | kmemleak_alloc - notify of a memory block allocation | ||
87 | kmemleak_free - notify of a memory block freeing | ||
88 | kmemleak_not_leak - mark an object as not a leak | ||
89 | kmemleak_ignore - do not scan or report an object as leak | ||
90 | kmemleak_scan_area - add scan areas inside a memory block | ||
91 | kmemleak_no_scan - do not scan a memory block | ||
92 | kmemleak_erase - erase an old value in a pointer variable | ||
93 | kmemleak_alloc_recursive - as kmemleak_alloc but checks the recursiveness | ||
94 | kmemleak_free_recursive - as kmemleak_free but checks the recursiveness | ||
95 | |||
96 | Dealing with false positives/negatives | ||
97 | -------------------------------------- | ||
98 | |||
99 | The false negatives are real memory leaks (orphan objects) but not | ||
100 | reported by kmemleak because values found during the memory scanning | ||
101 | point to such objects. To reduce the number of false negatives, kmemleak | ||
102 | provides the kmemleak_ignore, kmemleak_scan_area, kmemleak_no_scan and | ||
103 | kmemleak_erase functions (see above). The task stacks also increase the | ||
104 | amount of false negatives and their scanning is not enabled by default. | ||
105 | |||
106 | The false positives are objects wrongly reported as being memory leaks | ||
107 | (orphan). For objects known not to be leaks, kmemleak provides the | ||
108 | kmemleak_not_leak function. The kmemleak_ignore could also be used if | ||
109 | the memory block is known not to contain other pointers and it will no | ||
110 | longer be scanned. | ||
111 | |||
112 | Some of the reported leaks are only transient, especially on SMP | ||
113 | systems, because of pointers temporarily stored in CPU registers or | ||
114 | stacks. Kmemleak defines MSECS_MIN_AGE (defaulting to 1000) representing | ||
115 | the minimum age of an object to be reported as a memory leak. | ||
116 | |||
117 | Limitations and Drawbacks | ||
118 | ------------------------- | ||
119 | |||
120 | The main drawback is the reduced performance of memory allocation and | ||
121 | freeing. To avoid other penalties, the memory scanning is only performed | ||
122 | when the /sys/kernel/debug/kmemleak file is read. Anyway, this tool is | ||
123 | intended for debugging purposes where the performance might not be the | ||
124 | most important requirement. | ||
125 | |||
126 | To keep the algorithm simple, kmemleak scans for values pointing to any | ||
127 | address inside a block's address range. This may lead to an increased | ||
128 | number of false negatives. However, it is likely that a real memory leak | ||
129 | will eventually become visible. | ||
130 | |||
131 | Another source of false negatives is the data stored in non-pointer | ||
132 | values. In a future version, kmemleak could only scan the pointer | ||
133 | members in the allocated structures. This feature would solve many of | ||
134 | the false negative cases described above. | ||
135 | |||
136 | The tool can report false positives. These are cases where an allocated | ||
137 | block doesn't need to be freed (some cases in the init_call functions), | ||
138 | the pointer is calculated by other methods than the usual container_of | ||
139 | macro or the pointer is stored in a location not scanned by kmemleak. | ||
140 | |||
141 | Page allocations and ioremap are not tracked. Only the ARM and x86 | ||
142 | architectures are currently supported. | ||
diff --git a/MAINTAINERS b/MAINTAINERS index e697b67031a2..c944d618dc83 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -3372,6 +3372,12 @@ F: Documentation/trace/kmemtrace.txt | |||
3372 | F: include/trace/kmemtrace.h | 3372 | F: include/trace/kmemtrace.h |
3373 | F: kernel/trace/kmemtrace.c | 3373 | F: kernel/trace/kmemtrace.c |
3374 | 3374 | ||
3375 | KMEMLEAK | ||
3376 | P: Catalin Marinas | ||
3377 | M: catalin.marinas@arm.com | ||
3378 | L: linux-kernel@vger.kernel.org | ||
3379 | S: Maintained | ||
3380 | |||
3375 | KPROBES | 3381 | KPROBES |
3376 | P: Ananth N Mavinakayanahalli | 3382 | P: Ananth N Mavinakayanahalli |
3377 | M: ananth@in.ibm.com | 3383 | M: ananth@in.ibm.com |
@@ -4405,6 +4411,16 @@ S: Maintained | |||
4405 | F: include/linux/delayacct.h | 4411 | F: include/linux/delayacct.h |
4406 | F: kernel/delayacct.c | 4412 | F: kernel/delayacct.c |
4407 | 4413 | ||
4414 | PERFORMANCE COUNTER SUBSYSTEM | ||
4415 | P: Peter Zijlstra | ||
4416 | M: a.p.zijlstra@chello.nl | ||
4417 | P: Paul Mackerras | ||
4418 | M: paulus@samba.org | ||
4419 | P: Ingo Molnar | ||
4420 | M: mingo@elte.hu | ||
4421 | L: linux-kernel@vger.kernel.org | ||
4422 | S: Supported | ||
4423 | |||
4408 | PERSONALITY HANDLING | 4424 | PERSONALITY HANDLING |
4409 | P: Christoph Hellwig | 4425 | P: Christoph Hellwig |
4410 | M: hch@infradead.org | 4426 | M: hch@infradead.org |
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index b7e034b0a6dd..20a44d0c9fdd 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h | |||
@@ -131,5 +131,44 @@ static inline int irqs_disabled_flags(unsigned long flags) | |||
131 | */ | 131 | */ |
132 | struct irq_chip; | 132 | struct irq_chip; |
133 | 133 | ||
134 | #ifdef CONFIG_PERF_COUNTERS | ||
135 | static inline unsigned long test_perf_counter_pending(void) | ||
136 | { | ||
137 | unsigned long x; | ||
138 | |||
139 | asm volatile("lbz %0,%1(13)" | ||
140 | : "=r" (x) | ||
141 | : "i" (offsetof(struct paca_struct, perf_counter_pending))); | ||
142 | return x; | ||
143 | } | ||
144 | |||
145 | static inline void set_perf_counter_pending(void) | ||
146 | { | ||
147 | asm volatile("stb %0,%1(13)" : : | ||
148 | "r" (1), | ||
149 | "i" (offsetof(struct paca_struct, perf_counter_pending))); | ||
150 | } | ||
151 | |||
152 | static inline void clear_perf_counter_pending(void) | ||
153 | { | ||
154 | asm volatile("stb %0,%1(13)" : : | ||
155 | "r" (0), | ||
156 | "i" (offsetof(struct paca_struct, perf_counter_pending))); | ||
157 | } | ||
158 | |||
159 | extern void perf_counter_do_pending(void); | ||
160 | |||
161 | #else | ||
162 | |||
163 | static inline unsigned long test_perf_counter_pending(void) | ||
164 | { | ||
165 | return 0; | ||
166 | } | ||
167 | |||
168 | static inline void set_perf_counter_pending(void) {} | ||
169 | static inline void clear_perf_counter_pending(void) {} | ||
170 | static inline void perf_counter_do_pending(void) {} | ||
171 | #endif /* CONFIG_PERF_COUNTERS */ | ||
172 | |||
134 | #endif /* __KERNEL__ */ | 173 | #endif /* __KERNEL__ */ |
135 | #endif /* _ASM_POWERPC_HW_IRQ_H */ | 174 | #endif /* _ASM_POWERPC_HW_IRQ_H */ |
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 082b3aedf145..6ef055723019 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h | |||
@@ -99,6 +99,7 @@ struct paca_struct { | |||
99 | u8 soft_enabled; /* irq soft-enable flag */ | 99 | u8 soft_enabled; /* irq soft-enable flag */ |
100 | u8 hard_enabled; /* set if irqs are enabled in MSR */ | 100 | u8 hard_enabled; /* set if irqs are enabled in MSR */ |
101 | u8 io_sync; /* writel() needs spin_unlock sync */ | 101 | u8 io_sync; /* writel() needs spin_unlock sync */ |
102 | u8 perf_counter_pending; /* PM interrupt while soft-disabled */ | ||
102 | 103 | ||
103 | /* Stuff for accurate time accounting */ | 104 | /* Stuff for accurate time accounting */ |
104 | u64 user_time; /* accumulated usermode TB ticks */ | 105 | u64 user_time; /* accumulated usermode TB ticks */ |
diff --git a/arch/powerpc/include/asm/perf_counter.h b/arch/powerpc/include/asm/perf_counter.h new file mode 100644 index 000000000000..cc7c887705b8 --- /dev/null +++ b/arch/powerpc/include/asm/perf_counter.h | |||
@@ -0,0 +1,98 @@ | |||
1 | /* | ||
2 | * Performance counter support - PowerPC-specific definitions. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/types.h> | ||
12 | |||
13 | #define MAX_HWCOUNTERS 8 | ||
14 | #define MAX_EVENT_ALTERNATIVES 8 | ||
15 | #define MAX_LIMITED_HWCOUNTERS 2 | ||
16 | |||
17 | /* | ||
18 | * This struct provides the constants and functions needed to | ||
19 | * describe the PMU on a particular POWER-family CPU. | ||
20 | */ | ||
21 | struct power_pmu { | ||
22 | int n_counter; | ||
23 | int max_alternatives; | ||
24 | u64 add_fields; | ||
25 | u64 test_adder; | ||
26 | int (*compute_mmcr)(u64 events[], int n_ev, | ||
27 | unsigned int hwc[], u64 mmcr[]); | ||
28 | int (*get_constraint)(u64 event, u64 *mskp, u64 *valp); | ||
29 | int (*get_alternatives)(u64 event, unsigned int flags, | ||
30 | u64 alt[]); | ||
31 | void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); | ||
32 | int (*limited_pmc_event)(u64 event); | ||
33 | u32 flags; | ||
34 | int n_generic; | ||
35 | int *generic_events; | ||
36 | int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] | ||
37 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
38 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
39 | }; | ||
40 | |||
41 | extern struct power_pmu *ppmu; | ||
42 | |||
43 | /* | ||
44 | * Values for power_pmu.flags | ||
45 | */ | ||
46 | #define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */ | ||
47 | #define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */ | ||
48 | |||
49 | /* | ||
50 | * Values for flags to get_alternatives() | ||
51 | */ | ||
52 | #define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */ | ||
53 | #define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ | ||
54 | #define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ | ||
55 | |||
56 | struct pt_regs; | ||
57 | extern unsigned long perf_misc_flags(struct pt_regs *regs); | ||
58 | #define perf_misc_flags(regs) perf_misc_flags(regs) | ||
59 | |||
60 | extern unsigned long perf_instruction_pointer(struct pt_regs *regs); | ||
61 | |||
62 | /* | ||
63 | * The power_pmu.get_constraint function returns a 64-bit value and | ||
64 | * a 64-bit mask that express the constraints between this event and | ||
65 | * other events. | ||
66 | * | ||
67 | * The value and mask are divided up into (non-overlapping) bitfields | ||
68 | * of three different types: | ||
69 | * | ||
70 | * Select field: this expresses the constraint that some set of bits | ||
71 | * in MMCR* needs to be set to a specific value for this event. For a | ||
72 | * select field, the mask contains 1s in every bit of the field, and | ||
73 | * the value contains a unique value for each possible setting of the | ||
74 | * MMCR* bits. The constraint checking code will ensure that two events | ||
75 | * that set the same field in their masks have the same value in their | ||
76 | * value dwords. | ||
77 | * | ||
78 | * Add field: this expresses the constraint that there can be at most | ||
79 | * N events in a particular class. A field of k bits can be used for | ||
80 | * N <= 2^(k-1) - 1. The mask has the most significant bit of the field | ||
81 | * set (and the other bits 0), and the value has only the least significant | ||
82 | * bit of the field set. In addition, the 'add_fields' and 'test_adder' | ||
83 | * in the struct power_pmu for this processor come into play. The | ||
84 | * add_fields value contains 1 in the LSB of the field, and the | ||
85 | * test_adder contains 2^(k-1) - 1 - N in the field. | ||
86 | * | ||
87 | * NAND field: this expresses the constraint that you may not have events | ||
88 | * in all of a set of classes. (For example, on PPC970, you can't select | ||
89 | * events from the FPU, ISU and IDU simultaneously, although any two are | ||
90 | * possible.) For N classes, the field is N+1 bits wide, and each class | ||
91 | * is assigned one bit from the least-significant N bits. The mask has | ||
92 | * only the most-significant bit set, and the value has only the bit | ||
93 | * for the event's class set. The test_adder has the least significant | ||
94 | * bit set in the field. | ||
95 | * | ||
96 | * If an event is not subject to the constraint expressed by a particular | ||
97 | * field, then it will have 0 in both the mask and value for that field. | ||
98 | */ | ||
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index e8018d540e87..fb359b0a6937 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h | |||
@@ -492,11 +492,13 @@ | |||
492 | #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ | 492 | #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ |
493 | #define SPRN_MMCR1 798 | 493 | #define SPRN_MMCR1 798 |
494 | #define SPRN_MMCRA 0x312 | 494 | #define SPRN_MMCRA 0x312 |
495 | #define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ | ||
495 | #define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ | 496 | #define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ |
496 | #define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ | 497 | #define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ |
497 | #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ | 498 | #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ |
498 | #define MMCRA_SLOT_SHIFT 24 | 499 | #define MMCRA_SLOT_SHIFT 24 |
499 | #define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */ | 500 | #define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */ |
501 | #define POWER6_MMCRA_SDSYNC 0x0000080000000000ULL /* SDAR/SIAR synced */ | ||
500 | #define POWER6_MMCRA_SIHV 0x0000040000000000ULL | 502 | #define POWER6_MMCRA_SIHV 0x0000040000000000ULL |
501 | #define POWER6_MMCRA_SIPR 0x0000020000000000ULL | 503 | #define POWER6_MMCRA_SIPR 0x0000020000000000ULL |
502 | #define POWER6_MMCRA_THRM 0x00000020UL | 504 | #define POWER6_MMCRA_THRM 0x00000020UL |
diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index d98a30dfd41c..a0b92de51c7e 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h | |||
@@ -322,6 +322,6 @@ SYSCALL_SPU(epoll_create1) | |||
322 | SYSCALL_SPU(dup3) | 322 | SYSCALL_SPU(dup3) |
323 | SYSCALL_SPU(pipe2) | 323 | SYSCALL_SPU(pipe2) |
324 | SYSCALL(inotify_init1) | 324 | SYSCALL(inotify_init1) |
325 | SYSCALL(ni_syscall) | 325 | SYSCALL_SPU(perf_counter_open) |
326 | COMPAT_SYS_SPU(preadv) | 326 | COMPAT_SYS_SPU(preadv) |
327 | COMPAT_SYS_SPU(pwritev) | 327 | COMPAT_SYS_SPU(pwritev) |
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 3f06f8ec81c5..4badac2d11d1 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h | |||
@@ -341,6 +341,7 @@ | |||
341 | #define __NR_dup3 316 | 341 | #define __NR_dup3 316 |
342 | #define __NR_pipe2 317 | 342 | #define __NR_pipe2 317 |
343 | #define __NR_inotify_init1 318 | 343 | #define __NR_inotify_init1 318 |
344 | #define __NR_perf_counter_open 319 | ||
344 | #define __NR_preadv 320 | 345 | #define __NR_preadv 320 |
345 | #define __NR_pwritev 321 | 346 | #define __NR_pwritev 321 |
346 | 347 | ||
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 71901fbda4a5..a2c683403c2b 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile | |||
@@ -94,6 +94,9 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o | |||
94 | 94 | ||
95 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | 95 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o |
96 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o | 96 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o |
97 | obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \ | ||
98 | power5-pmu.o power5+-pmu.o power6-pmu.o \ | ||
99 | power7-pmu.o | ||
97 | 100 | ||
98 | obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o | 101 | obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o |
99 | 102 | ||
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 1e40bc053946..e981d1ce1914 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -131,6 +131,7 @@ int main(void) | |||
131 | DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); | 131 | DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); |
132 | DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); | 132 | DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); |
133 | DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); | 133 | DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); |
134 | DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending)); | ||
134 | DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); | 135 | DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); |
135 | DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); | 136 | DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); |
136 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); | 137 | DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); |
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index abfc32330479..43e073477c34 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S | |||
@@ -526,6 +526,15 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) | |||
526 | 2: | 526 | 2: |
527 | TRACE_AND_RESTORE_IRQ(r5); | 527 | TRACE_AND_RESTORE_IRQ(r5); |
528 | 528 | ||
529 | #ifdef CONFIG_PERF_COUNTERS | ||
530 | /* check paca->perf_counter_pending if we're enabling ints */ | ||
531 | lbz r3,PACAPERFPEND(r13) | ||
532 | and. r3,r3,r5 | ||
533 | beq 27f | ||
534 | bl .perf_counter_do_pending | ||
535 | 27: | ||
536 | #endif /* CONFIG_PERF_COUNTERS */ | ||
537 | |||
529 | /* extract EE bit and use it to restore paca->hard_enabled */ | 538 | /* extract EE bit and use it to restore paca->hard_enabled */ |
530 | ld r3,_MSR(r1) | 539 | ld r3,_MSR(r1) |
531 | rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ | 540 | rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ |
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 8c1a4966867e..feff792ed0f9 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c | |||
@@ -135,6 +135,11 @@ notrace void raw_local_irq_restore(unsigned long en) | |||
135 | iseries_handle_interrupts(); | 135 | iseries_handle_interrupts(); |
136 | } | 136 | } |
137 | 137 | ||
138 | if (test_perf_counter_pending()) { | ||
139 | clear_perf_counter_pending(); | ||
140 | perf_counter_do_pending(); | ||
141 | } | ||
142 | |||
138 | /* | 143 | /* |
139 | * if (get_paca()->hard_enabled) return; | 144 | * if (get_paca()->hard_enabled) return; |
140 | * But again we need to take care that gcc gets hard_enabled directly | 145 | * But again we need to take care that gcc gets hard_enabled directly |
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c new file mode 100644 index 000000000000..bb202388170e --- /dev/null +++ b/arch/powerpc/kernel/perf_counter.c | |||
@@ -0,0 +1,1263 @@ | |||
1 | /* | ||
2 | * Performance counter support - powerpc architecture code | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/perf_counter.h> | ||
14 | #include <linux/percpu.h> | ||
15 | #include <linux/hardirq.h> | ||
16 | #include <asm/reg.h> | ||
17 | #include <asm/pmc.h> | ||
18 | #include <asm/machdep.h> | ||
19 | #include <asm/firmware.h> | ||
20 | #include <asm/ptrace.h> | ||
21 | |||
22 | struct cpu_hw_counters { | ||
23 | int n_counters; | ||
24 | int n_percpu; | ||
25 | int disabled; | ||
26 | int n_added; | ||
27 | int n_limited; | ||
28 | u8 pmcs_enabled; | ||
29 | struct perf_counter *counter[MAX_HWCOUNTERS]; | ||
30 | u64 events[MAX_HWCOUNTERS]; | ||
31 | unsigned int flags[MAX_HWCOUNTERS]; | ||
32 | u64 mmcr[3]; | ||
33 | struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; | ||
34 | u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; | ||
35 | }; | ||
36 | DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); | ||
37 | |||
38 | struct power_pmu *ppmu; | ||
39 | |||
40 | /* | ||
41 | * Normally, to ignore kernel events we set the FCS (freeze counters | ||
42 | * in supervisor mode) bit in MMCR0, but if the kernel runs with the | ||
43 | * hypervisor bit set in the MSR, or if we are running on a processor | ||
44 | * where the hypervisor bit is forced to 1 (as on Apple G5 processors), | ||
45 | * then we need to use the FCHV bit to ignore kernel events. | ||
46 | */ | ||
47 | static unsigned int freeze_counters_kernel = MMCR0_FCS; | ||
48 | |||
49 | static void perf_counter_interrupt(struct pt_regs *regs); | ||
50 | |||
51 | void perf_counter_print_debug(void) | ||
52 | { | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | * Read one performance monitor counter (PMC). | ||
57 | */ | ||
58 | static unsigned long read_pmc(int idx) | ||
59 | { | ||
60 | unsigned long val; | ||
61 | |||
62 | switch (idx) { | ||
63 | case 1: | ||
64 | val = mfspr(SPRN_PMC1); | ||
65 | break; | ||
66 | case 2: | ||
67 | val = mfspr(SPRN_PMC2); | ||
68 | break; | ||
69 | case 3: | ||
70 | val = mfspr(SPRN_PMC3); | ||
71 | break; | ||
72 | case 4: | ||
73 | val = mfspr(SPRN_PMC4); | ||
74 | break; | ||
75 | case 5: | ||
76 | val = mfspr(SPRN_PMC5); | ||
77 | break; | ||
78 | case 6: | ||
79 | val = mfspr(SPRN_PMC6); | ||
80 | break; | ||
81 | case 7: | ||
82 | val = mfspr(SPRN_PMC7); | ||
83 | break; | ||
84 | case 8: | ||
85 | val = mfspr(SPRN_PMC8); | ||
86 | break; | ||
87 | default: | ||
88 | printk(KERN_ERR "oops trying to read PMC%d\n", idx); | ||
89 | val = 0; | ||
90 | } | ||
91 | return val; | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * Write one PMC. | ||
96 | */ | ||
97 | static void write_pmc(int idx, unsigned long val) | ||
98 | { | ||
99 | switch (idx) { | ||
100 | case 1: | ||
101 | mtspr(SPRN_PMC1, val); | ||
102 | break; | ||
103 | case 2: | ||
104 | mtspr(SPRN_PMC2, val); | ||
105 | break; | ||
106 | case 3: | ||
107 | mtspr(SPRN_PMC3, val); | ||
108 | break; | ||
109 | case 4: | ||
110 | mtspr(SPRN_PMC4, val); | ||
111 | break; | ||
112 | case 5: | ||
113 | mtspr(SPRN_PMC5, val); | ||
114 | break; | ||
115 | case 6: | ||
116 | mtspr(SPRN_PMC6, val); | ||
117 | break; | ||
118 | case 7: | ||
119 | mtspr(SPRN_PMC7, val); | ||
120 | break; | ||
121 | case 8: | ||
122 | mtspr(SPRN_PMC8, val); | ||
123 | break; | ||
124 | default: | ||
125 | printk(KERN_ERR "oops trying to write PMC%d\n", idx); | ||
126 | } | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * Check if a set of events can all go on the PMU at once. | ||
131 | * If they can't, this will look at alternative codes for the events | ||
132 | * and see if any combination of alternative codes is feasible. | ||
133 | * The feasible set is returned in event[]. | ||
134 | */ | ||
135 | static int power_check_constraints(u64 event[], unsigned int cflags[], | ||
136 | int n_ev) | ||
137 | { | ||
138 | u64 mask, value, nv; | ||
139 | u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
140 | u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
141 | u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; | ||
142 | u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; | ||
143 | int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; | ||
144 | int i, j; | ||
145 | u64 addf = ppmu->add_fields; | ||
146 | u64 tadd = ppmu->test_adder; | ||
147 | |||
148 | if (n_ev > ppmu->n_counter) | ||
149 | return -1; | ||
150 | |||
151 | /* First see if the events will go on as-is */ | ||
152 | for (i = 0; i < n_ev; ++i) { | ||
153 | if ((cflags[i] & PPMU_LIMITED_PMC_REQD) | ||
154 | && !ppmu->limited_pmc_event(event[i])) { | ||
155 | ppmu->get_alternatives(event[i], cflags[i], | ||
156 | alternatives[i]); | ||
157 | event[i] = alternatives[i][0]; | ||
158 | } | ||
159 | if (ppmu->get_constraint(event[i], &amasks[i][0], | ||
160 | &avalues[i][0])) | ||
161 | return -1; | ||
162 | } | ||
163 | value = mask = 0; | ||
164 | for (i = 0; i < n_ev; ++i) { | ||
165 | nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf); | ||
166 | if ((((nv + tadd) ^ value) & mask) != 0 || | ||
167 | (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0) | ||
168 | break; | ||
169 | value = nv; | ||
170 | mask |= amasks[i][0]; | ||
171 | } | ||
172 | if (i == n_ev) | ||
173 | return 0; /* all OK */ | ||
174 | |||
175 | /* doesn't work, gather alternatives... */ | ||
176 | if (!ppmu->get_alternatives) | ||
177 | return -1; | ||
178 | for (i = 0; i < n_ev; ++i) { | ||
179 | choice[i] = 0; | ||
180 | n_alt[i] = ppmu->get_alternatives(event[i], cflags[i], | ||
181 | alternatives[i]); | ||
182 | for (j = 1; j < n_alt[i]; ++j) | ||
183 | ppmu->get_constraint(alternatives[i][j], | ||
184 | &amasks[i][j], &avalues[i][j]); | ||
185 | } | ||
186 | |||
187 | /* enumerate all possibilities and see if any will work */ | ||
188 | i = 0; | ||
189 | j = -1; | ||
190 | value = mask = nv = 0; | ||
191 | while (i < n_ev) { | ||
192 | if (j >= 0) { | ||
193 | /* we're backtracking, restore context */ | ||
194 | value = svalues[i]; | ||
195 | mask = smasks[i]; | ||
196 | j = choice[i]; | ||
197 | } | ||
198 | /* | ||
199 | * See if any alternative k for event i, | ||
200 | * where k > j, will satisfy the constraints. | ||
201 | */ | ||
202 | while (++j < n_alt[i]) { | ||
203 | nv = (value | avalues[i][j]) + | ||
204 | (value & avalues[i][j] & addf); | ||
205 | if ((((nv + tadd) ^ value) & mask) == 0 && | ||
206 | (((nv + tadd) ^ avalues[i][j]) | ||
207 | & amasks[i][j]) == 0) | ||
208 | break; | ||
209 | } | ||
210 | if (j >= n_alt[i]) { | ||
211 | /* | ||
212 | * No feasible alternative, backtrack | ||
213 | * to event i-1 and continue enumerating its | ||
214 | * alternatives from where we got up to. | ||
215 | */ | ||
216 | if (--i < 0) | ||
217 | return -1; | ||
218 | } else { | ||
219 | /* | ||
220 | * Found a feasible alternative for event i, | ||
221 | * remember where we got up to with this event, | ||
222 | * go on to the next event, and start with | ||
223 | * the first alternative for it. | ||
224 | */ | ||
225 | choice[i] = j; | ||
226 | svalues[i] = value; | ||
227 | smasks[i] = mask; | ||
228 | value = nv; | ||
229 | mask |= amasks[i][j]; | ||
230 | ++i; | ||
231 | j = -1; | ||
232 | } | ||
233 | } | ||
234 | |||
235 | /* OK, we have a feasible combination, tell the caller the solution */ | ||
236 | for (i = 0; i < n_ev; ++i) | ||
237 | event[i] = alternatives[i][choice[i]]; | ||
238 | return 0; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * Check if newly-added counters have consistent settings for | ||
243 | * exclude_{user,kernel,hv} with each other and any previously | ||
244 | * added counters. | ||
245 | */ | ||
246 | static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[], | ||
247 | int n_prev, int n_new) | ||
248 | { | ||
249 | int eu = 0, ek = 0, eh = 0; | ||
250 | int i, n, first; | ||
251 | struct perf_counter *counter; | ||
252 | |||
253 | n = n_prev + n_new; | ||
254 | if (n <= 1) | ||
255 | return 0; | ||
256 | |||
257 | first = 1; | ||
258 | for (i = 0; i < n; ++i) { | ||
259 | if (cflags[i] & PPMU_LIMITED_PMC_OK) { | ||
260 | cflags[i] &= ~PPMU_LIMITED_PMC_REQD; | ||
261 | continue; | ||
262 | } | ||
263 | counter = ctrs[i]; | ||
264 | if (first) { | ||
265 | eu = counter->attr.exclude_user; | ||
266 | ek = counter->attr.exclude_kernel; | ||
267 | eh = counter->attr.exclude_hv; | ||
268 | first = 0; | ||
269 | } else if (counter->attr.exclude_user != eu || | ||
270 | counter->attr.exclude_kernel != ek || | ||
271 | counter->attr.exclude_hv != eh) { | ||
272 | return -EAGAIN; | ||
273 | } | ||
274 | } | ||
275 | |||
276 | if (eu || ek || eh) | ||
277 | for (i = 0; i < n; ++i) | ||
278 | if (cflags[i] & PPMU_LIMITED_PMC_OK) | ||
279 | cflags[i] |= PPMU_LIMITED_PMC_REQD; | ||
280 | |||
281 | return 0; | ||
282 | } | ||
283 | |||
284 | static void power_pmu_read(struct perf_counter *counter) | ||
285 | { | ||
286 | long val, delta, prev; | ||
287 | |||
288 | if (!counter->hw.idx) | ||
289 | return; | ||
290 | /* | ||
291 | * Performance monitor interrupts come even when interrupts | ||
292 | * are soft-disabled, as long as interrupts are hard-enabled. | ||
293 | * Therefore we treat them like NMIs. | ||
294 | */ | ||
295 | do { | ||
296 | prev = atomic64_read(&counter->hw.prev_count); | ||
297 | barrier(); | ||
298 | val = read_pmc(counter->hw.idx); | ||
299 | } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev); | ||
300 | |||
301 | /* The counters are only 32 bits wide */ | ||
302 | delta = (val - prev) & 0xfffffffful; | ||
303 | atomic64_add(delta, &counter->count); | ||
304 | atomic64_sub(delta, &counter->hw.period_left); | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * On some machines, PMC5 and PMC6 can't be written, don't respect | ||
309 | * the freeze conditions, and don't generate interrupts. This tells | ||
310 | * us if `counter' is using such a PMC. | ||
311 | */ | ||
312 | static int is_limited_pmc(int pmcnum) | ||
313 | { | ||
314 | return (ppmu->flags & PPMU_LIMITED_PMC5_6) | ||
315 | && (pmcnum == 5 || pmcnum == 6); | ||
316 | } | ||
317 | |||
318 | static void freeze_limited_counters(struct cpu_hw_counters *cpuhw, | ||
319 | unsigned long pmc5, unsigned long pmc6) | ||
320 | { | ||
321 | struct perf_counter *counter; | ||
322 | u64 val, prev, delta; | ||
323 | int i; | ||
324 | |||
325 | for (i = 0; i < cpuhw->n_limited; ++i) { | ||
326 | counter = cpuhw->limited_counter[i]; | ||
327 | if (!counter->hw.idx) | ||
328 | continue; | ||
329 | val = (counter->hw.idx == 5) ? pmc5 : pmc6; | ||
330 | prev = atomic64_read(&counter->hw.prev_count); | ||
331 | counter->hw.idx = 0; | ||
332 | delta = (val - prev) & 0xfffffffful; | ||
333 | atomic64_add(delta, &counter->count); | ||
334 | } | ||
335 | } | ||
336 | |||
337 | static void thaw_limited_counters(struct cpu_hw_counters *cpuhw, | ||
338 | unsigned long pmc5, unsigned long pmc6) | ||
339 | { | ||
340 | struct perf_counter *counter; | ||
341 | u64 val; | ||
342 | int i; | ||
343 | |||
344 | for (i = 0; i < cpuhw->n_limited; ++i) { | ||
345 | counter = cpuhw->limited_counter[i]; | ||
346 | counter->hw.idx = cpuhw->limited_hwidx[i]; | ||
347 | val = (counter->hw.idx == 5) ? pmc5 : pmc6; | ||
348 | atomic64_set(&counter->hw.prev_count, val); | ||
349 | perf_counter_update_userpage(counter); | ||
350 | } | ||
351 | } | ||
352 | |||
353 | /* | ||
354 | * Since limited counters don't respect the freeze conditions, we | ||
355 | * have to read them immediately after freezing or unfreezing the | ||
356 | * other counters. We try to keep the values from the limited | ||
357 | * counters as consistent as possible by keeping the delay (in | ||
358 | * cycles and instructions) between freezing/unfreezing and reading | ||
359 | * the limited counters as small and consistent as possible. | ||
360 | * Therefore, if any limited counters are in use, we read them | ||
361 | * both, and always in the same order, to minimize variability, | ||
362 | * and do it inside the same asm that writes MMCR0. | ||
363 | */ | ||
364 | static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0) | ||
365 | { | ||
366 | unsigned long pmc5, pmc6; | ||
367 | |||
368 | if (!cpuhw->n_limited) { | ||
369 | mtspr(SPRN_MMCR0, mmcr0); | ||
370 | return; | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * Write MMCR0, then read PMC5 and PMC6 immediately. | ||
375 | * To ensure we don't get a performance monitor interrupt | ||
376 | * between writing MMCR0 and freezing/thawing the limited | ||
377 | * counters, we first write MMCR0 with the counter overflow | ||
378 | * interrupt enable bits turned off. | ||
379 | */ | ||
380 | asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" | ||
381 | : "=&r" (pmc5), "=&r" (pmc6) | ||
382 | : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), | ||
383 | "i" (SPRN_MMCR0), | ||
384 | "i" (SPRN_PMC5), "i" (SPRN_PMC6)); | ||
385 | |||
386 | if (mmcr0 & MMCR0_FC) | ||
387 | freeze_limited_counters(cpuhw, pmc5, pmc6); | ||
388 | else | ||
389 | thaw_limited_counters(cpuhw, pmc5, pmc6); | ||
390 | |||
391 | /* | ||
392 | * Write the full MMCR0 including the counter overflow interrupt | ||
393 | * enable bits, if necessary. | ||
394 | */ | ||
395 | if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) | ||
396 | mtspr(SPRN_MMCR0, mmcr0); | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Disable all counters to prevent PMU interrupts and to allow | ||
401 | * counters to be added or removed. | ||
402 | */ | ||
403 | void hw_perf_disable(void) | ||
404 | { | ||
405 | struct cpu_hw_counters *cpuhw; | ||
406 | unsigned long ret; | ||
407 | unsigned long flags; | ||
408 | |||
409 | local_irq_save(flags); | ||
410 | cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
411 | |||
412 | ret = cpuhw->disabled; | ||
413 | if (!ret) { | ||
414 | cpuhw->disabled = 1; | ||
415 | cpuhw->n_added = 0; | ||
416 | |||
417 | /* | ||
418 | * Check if we ever enabled the PMU on this cpu. | ||
419 | */ | ||
420 | if (!cpuhw->pmcs_enabled) { | ||
421 | if (ppc_md.enable_pmcs) | ||
422 | ppc_md.enable_pmcs(); | ||
423 | cpuhw->pmcs_enabled = 1; | ||
424 | } | ||
425 | |||
426 | /* | ||
427 | * Disable instruction sampling if it was enabled | ||
428 | */ | ||
429 | if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { | ||
430 | mtspr(SPRN_MMCRA, | ||
431 | cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
432 | mb(); | ||
433 | } | ||
434 | |||
435 | /* | ||
436 | * Set the 'freeze counters' bit. | ||
437 | * The barrier is to make sure the mtspr has been | ||
438 | * executed and the PMU has frozen the counters | ||
439 | * before we return. | ||
440 | */ | ||
441 | write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); | ||
442 | mb(); | ||
443 | } | ||
444 | local_irq_restore(flags); | ||
445 | } | ||
446 | |||
447 | /* | ||
448 | * Re-enable all counters if disable == 0. | ||
449 | * If we were previously disabled and counters were added, then | ||
450 | * put the new config on the PMU. | ||
451 | */ | ||
452 | void hw_perf_enable(void) | ||
453 | { | ||
454 | struct perf_counter *counter; | ||
455 | struct cpu_hw_counters *cpuhw; | ||
456 | unsigned long flags; | ||
457 | long i; | ||
458 | unsigned long val; | ||
459 | s64 left; | ||
460 | unsigned int hwc_index[MAX_HWCOUNTERS]; | ||
461 | int n_lim; | ||
462 | int idx; | ||
463 | |||
464 | local_irq_save(flags); | ||
465 | cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
466 | if (!cpuhw->disabled) { | ||
467 | local_irq_restore(flags); | ||
468 | return; | ||
469 | } | ||
470 | cpuhw->disabled = 0; | ||
471 | |||
472 | /* | ||
473 | * If we didn't change anything, or only removed counters, | ||
474 | * no need to recalculate MMCR* settings and reset the PMCs. | ||
475 | * Just reenable the PMU with the current MMCR* settings | ||
476 | * (possibly updated for removal of counters). | ||
477 | */ | ||
478 | if (!cpuhw->n_added) { | ||
479 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
480 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); | ||
481 | if (cpuhw->n_counters == 0) | ||
482 | get_lppaca()->pmcregs_in_use = 0; | ||
483 | goto out_enable; | ||
484 | } | ||
485 | |||
486 | /* | ||
487 | * Compute MMCR* values for the new set of counters | ||
488 | */ | ||
489 | if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index, | ||
490 | cpuhw->mmcr)) { | ||
491 | /* shouldn't ever get here */ | ||
492 | printk(KERN_ERR "oops compute_mmcr failed\n"); | ||
493 | goto out; | ||
494 | } | ||
495 | |||
496 | /* | ||
497 | * Add in MMCR0 freeze bits corresponding to the | ||
498 | * attr.exclude_* bits for the first counter. | ||
499 | * We have already checked that all counters have the | ||
500 | * same values for these bits as the first counter. | ||
501 | */ | ||
502 | counter = cpuhw->counter[0]; | ||
503 | if (counter->attr.exclude_user) | ||
504 | cpuhw->mmcr[0] |= MMCR0_FCP; | ||
505 | if (counter->attr.exclude_kernel) | ||
506 | cpuhw->mmcr[0] |= freeze_counters_kernel; | ||
507 | if (counter->attr.exclude_hv) | ||
508 | cpuhw->mmcr[0] |= MMCR0_FCHV; | ||
509 | |||
510 | /* | ||
511 | * Write the new configuration to MMCR* with the freeze | ||
512 | * bit set and set the hardware counters to their initial values. | ||
513 | * Then unfreeze the counters. | ||
514 | */ | ||
515 | get_lppaca()->pmcregs_in_use = 1; | ||
516 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); | ||
517 | mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); | ||
518 | mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) | ||
519 | | MMCR0_FC); | ||
520 | |||
521 | /* | ||
522 | * Read off any pre-existing counters that need to move | ||
523 | * to another PMC. | ||
524 | */ | ||
525 | for (i = 0; i < cpuhw->n_counters; ++i) { | ||
526 | counter = cpuhw->counter[i]; | ||
527 | if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) { | ||
528 | power_pmu_read(counter); | ||
529 | write_pmc(counter->hw.idx, 0); | ||
530 | counter->hw.idx = 0; | ||
531 | } | ||
532 | } | ||
533 | |||
534 | /* | ||
535 | * Initialize the PMCs for all the new and moved counters. | ||
536 | */ | ||
537 | cpuhw->n_limited = n_lim = 0; | ||
538 | for (i = 0; i < cpuhw->n_counters; ++i) { | ||
539 | counter = cpuhw->counter[i]; | ||
540 | if (counter->hw.idx) | ||
541 | continue; | ||
542 | idx = hwc_index[i] + 1; | ||
543 | if (is_limited_pmc(idx)) { | ||
544 | cpuhw->limited_counter[n_lim] = counter; | ||
545 | cpuhw->limited_hwidx[n_lim] = idx; | ||
546 | ++n_lim; | ||
547 | continue; | ||
548 | } | ||
549 | val = 0; | ||
550 | if (counter->hw.sample_period) { | ||
551 | left = atomic64_read(&counter->hw.period_left); | ||
552 | if (left < 0x80000000L) | ||
553 | val = 0x80000000L - left; | ||
554 | } | ||
555 | atomic64_set(&counter->hw.prev_count, val); | ||
556 | counter->hw.idx = idx; | ||
557 | write_pmc(idx, val); | ||
558 | perf_counter_update_userpage(counter); | ||
559 | } | ||
560 | cpuhw->n_limited = n_lim; | ||
561 | cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; | ||
562 | |||
563 | out_enable: | ||
564 | mb(); | ||
565 | write_mmcr0(cpuhw, cpuhw->mmcr[0]); | ||
566 | |||
567 | /* | ||
568 | * Enable instruction sampling if necessary | ||
569 | */ | ||
570 | if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { | ||
571 | mb(); | ||
572 | mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); | ||
573 | } | ||
574 | |||
575 | out: | ||
576 | local_irq_restore(flags); | ||
577 | } | ||
578 | |||
579 | static int collect_events(struct perf_counter *group, int max_count, | ||
580 | struct perf_counter *ctrs[], u64 *events, | ||
581 | unsigned int *flags) | ||
582 | { | ||
583 | int n = 0; | ||
584 | struct perf_counter *counter; | ||
585 | |||
586 | if (!is_software_counter(group)) { | ||
587 | if (n >= max_count) | ||
588 | return -1; | ||
589 | ctrs[n] = group; | ||
590 | flags[n] = group->hw.counter_base; | ||
591 | events[n++] = group->hw.config; | ||
592 | } | ||
593 | list_for_each_entry(counter, &group->sibling_list, list_entry) { | ||
594 | if (!is_software_counter(counter) && | ||
595 | counter->state != PERF_COUNTER_STATE_OFF) { | ||
596 | if (n >= max_count) | ||
597 | return -1; | ||
598 | ctrs[n] = counter; | ||
599 | flags[n] = counter->hw.counter_base; | ||
600 | events[n++] = counter->hw.config; | ||
601 | } | ||
602 | } | ||
603 | return n; | ||
604 | } | ||
605 | |||
606 | static void counter_sched_in(struct perf_counter *counter, int cpu) | ||
607 | { | ||
608 | counter->state = PERF_COUNTER_STATE_ACTIVE; | ||
609 | counter->oncpu = cpu; | ||
610 | counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped; | ||
611 | if (is_software_counter(counter)) | ||
612 | counter->pmu->enable(counter); | ||
613 | } | ||
614 | |||
615 | /* | ||
616 | * Called to enable a whole group of counters. | ||
617 | * Returns 1 if the group was enabled, or -EAGAIN if it could not be. | ||
618 | * Assumes the caller has disabled interrupts and has | ||
619 | * frozen the PMU with hw_perf_save_disable. | ||
620 | */ | ||
621 | int hw_perf_group_sched_in(struct perf_counter *group_leader, | ||
622 | struct perf_cpu_context *cpuctx, | ||
623 | struct perf_counter_context *ctx, int cpu) | ||
624 | { | ||
625 | struct cpu_hw_counters *cpuhw; | ||
626 | long i, n, n0; | ||
627 | struct perf_counter *sub; | ||
628 | |||
629 | cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
630 | n0 = cpuhw->n_counters; | ||
631 | n = collect_events(group_leader, ppmu->n_counter - n0, | ||
632 | &cpuhw->counter[n0], &cpuhw->events[n0], | ||
633 | &cpuhw->flags[n0]); | ||
634 | if (n < 0) | ||
635 | return -EAGAIN; | ||
636 | if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n)) | ||
637 | return -EAGAIN; | ||
638 | i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0); | ||
639 | if (i < 0) | ||
640 | return -EAGAIN; | ||
641 | cpuhw->n_counters = n0 + n; | ||
642 | cpuhw->n_added += n; | ||
643 | |||
644 | /* | ||
645 | * OK, this group can go on; update counter states etc., | ||
646 | * and enable any software counters | ||
647 | */ | ||
648 | for (i = n0; i < n0 + n; ++i) | ||
649 | cpuhw->counter[i]->hw.config = cpuhw->events[i]; | ||
650 | cpuctx->active_oncpu += n; | ||
651 | n = 1; | ||
652 | counter_sched_in(group_leader, cpu); | ||
653 | list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { | ||
654 | if (sub->state != PERF_COUNTER_STATE_OFF) { | ||
655 | counter_sched_in(sub, cpu); | ||
656 | ++n; | ||
657 | } | ||
658 | } | ||
659 | ctx->nr_active += n; | ||
660 | |||
661 | return 1; | ||
662 | } | ||
663 | |||
664 | /* | ||
665 | * Add a counter to the PMU. | ||
666 | * If all counters are not already frozen, then we disable and | ||
667 | * re-enable the PMU in order to get hw_perf_enable to do the | ||
668 | * actual work of reconfiguring the PMU. | ||
669 | */ | ||
670 | static int power_pmu_enable(struct perf_counter *counter) | ||
671 | { | ||
672 | struct cpu_hw_counters *cpuhw; | ||
673 | unsigned long flags; | ||
674 | int n0; | ||
675 | int ret = -EAGAIN; | ||
676 | |||
677 | local_irq_save(flags); | ||
678 | perf_disable(); | ||
679 | |||
680 | /* | ||
681 | * Add the counter to the list (if there is room) | ||
682 | * and check whether the total set is still feasible. | ||
683 | */ | ||
684 | cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
685 | n0 = cpuhw->n_counters; | ||
686 | if (n0 >= ppmu->n_counter) | ||
687 | goto out; | ||
688 | cpuhw->counter[n0] = counter; | ||
689 | cpuhw->events[n0] = counter->hw.config; | ||
690 | cpuhw->flags[n0] = counter->hw.counter_base; | ||
691 | if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1)) | ||
692 | goto out; | ||
693 | if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1)) | ||
694 | goto out; | ||
695 | |||
696 | counter->hw.config = cpuhw->events[n0]; | ||
697 | ++cpuhw->n_counters; | ||
698 | ++cpuhw->n_added; | ||
699 | |||
700 | ret = 0; | ||
701 | out: | ||
702 | perf_enable(); | ||
703 | local_irq_restore(flags); | ||
704 | return ret; | ||
705 | } | ||
706 | |||
707 | /* | ||
708 | * Remove a counter from the PMU. | ||
709 | */ | ||
710 | static void power_pmu_disable(struct perf_counter *counter) | ||
711 | { | ||
712 | struct cpu_hw_counters *cpuhw; | ||
713 | long i; | ||
714 | unsigned long flags; | ||
715 | |||
716 | local_irq_save(flags); | ||
717 | perf_disable(); | ||
718 | |||
719 | power_pmu_read(counter); | ||
720 | |||
721 | cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
722 | for (i = 0; i < cpuhw->n_counters; ++i) { | ||
723 | if (counter == cpuhw->counter[i]) { | ||
724 | while (++i < cpuhw->n_counters) | ||
725 | cpuhw->counter[i-1] = cpuhw->counter[i]; | ||
726 | --cpuhw->n_counters; | ||
727 | ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); | ||
728 | if (counter->hw.idx) { | ||
729 | write_pmc(counter->hw.idx, 0); | ||
730 | counter->hw.idx = 0; | ||
731 | } | ||
732 | perf_counter_update_userpage(counter); | ||
733 | break; | ||
734 | } | ||
735 | } | ||
736 | for (i = 0; i < cpuhw->n_limited; ++i) | ||
737 | if (counter == cpuhw->limited_counter[i]) | ||
738 | break; | ||
739 | if (i < cpuhw->n_limited) { | ||
740 | while (++i < cpuhw->n_limited) { | ||
741 | cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i]; | ||
742 | cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; | ||
743 | } | ||
744 | --cpuhw->n_limited; | ||
745 | } | ||
746 | if (cpuhw->n_counters == 0) { | ||
747 | /* disable exceptions if no counters are running */ | ||
748 | cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); | ||
749 | } | ||
750 | |||
751 | perf_enable(); | ||
752 | local_irq_restore(flags); | ||
753 | } | ||
754 | |||
755 | /* | ||
756 | * Re-enable interrupts on a counter after they were throttled | ||
757 | * because they were coming too fast. | ||
758 | */ | ||
759 | static void power_pmu_unthrottle(struct perf_counter *counter) | ||
760 | { | ||
761 | s64 val, left; | ||
762 | unsigned long flags; | ||
763 | |||
764 | if (!counter->hw.idx || !counter->hw.sample_period) | ||
765 | return; | ||
766 | local_irq_save(flags); | ||
767 | perf_disable(); | ||
768 | power_pmu_read(counter); | ||
769 | left = counter->hw.sample_period; | ||
770 | counter->hw.last_period = left; | ||
771 | val = 0; | ||
772 | if (left < 0x80000000L) | ||
773 | val = 0x80000000L - left; | ||
774 | write_pmc(counter->hw.idx, val); | ||
775 | atomic64_set(&counter->hw.prev_count, val); | ||
776 | atomic64_set(&counter->hw.period_left, left); | ||
777 | perf_counter_update_userpage(counter); | ||
778 | perf_enable(); | ||
779 | local_irq_restore(flags); | ||
780 | } | ||
781 | |||
782 | struct pmu power_pmu = { | ||
783 | .enable = power_pmu_enable, | ||
784 | .disable = power_pmu_disable, | ||
785 | .read = power_pmu_read, | ||
786 | .unthrottle = power_pmu_unthrottle, | ||
787 | }; | ||
788 | |||
789 | /* | ||
790 | * Return 1 if we might be able to put counter on a limited PMC, | ||
791 | * or 0 if not. | ||
792 | * A counter can only go on a limited PMC if it counts something | ||
793 | * that a limited PMC can count, doesn't require interrupts, and | ||
794 | * doesn't exclude any processor mode. | ||
795 | */ | ||
796 | static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev, | ||
797 | unsigned int flags) | ||
798 | { | ||
799 | int n; | ||
800 | u64 alt[MAX_EVENT_ALTERNATIVES]; | ||
801 | |||
802 | if (counter->attr.exclude_user | ||
803 | || counter->attr.exclude_kernel | ||
804 | || counter->attr.exclude_hv | ||
805 | || counter->attr.sample_period) | ||
806 | return 0; | ||
807 | |||
808 | if (ppmu->limited_pmc_event(ev)) | ||
809 | return 1; | ||
810 | |||
811 | /* | ||
812 | * The requested event isn't on a limited PMC already; | ||
813 | * see if any alternative code goes on a limited PMC. | ||
814 | */ | ||
815 | if (!ppmu->get_alternatives) | ||
816 | return 0; | ||
817 | |||
818 | flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; | ||
819 | n = ppmu->get_alternatives(ev, flags, alt); | ||
820 | |||
821 | return n > 0; | ||
822 | } | ||
823 | |||
824 | /* | ||
825 | * Find an alternative event that goes on a normal PMC, if possible, | ||
826 | * and return the event code, or 0 if there is no such alternative. | ||
827 | * (Note: event code 0 is "don't count" on all machines.) | ||
828 | */ | ||
829 | static u64 normal_pmc_alternative(u64 ev, unsigned long flags) | ||
830 | { | ||
831 | u64 alt[MAX_EVENT_ALTERNATIVES]; | ||
832 | int n; | ||
833 | |||
834 | flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); | ||
835 | n = ppmu->get_alternatives(ev, flags, alt); | ||
836 | if (!n) | ||
837 | return 0; | ||
838 | return alt[0]; | ||
839 | } | ||
840 | |||
841 | /* Number of perf_counters counting hardware events */ | ||
842 | static atomic_t num_counters; | ||
843 | /* Used to avoid races in calling reserve/release_pmc_hardware */ | ||
844 | static DEFINE_MUTEX(pmc_reserve_mutex); | ||
845 | |||
846 | /* | ||
847 | * Release the PMU if this is the last perf_counter. | ||
848 | */ | ||
849 | static void hw_perf_counter_destroy(struct perf_counter *counter) | ||
850 | { | ||
851 | if (!atomic_add_unless(&num_counters, -1, 1)) { | ||
852 | mutex_lock(&pmc_reserve_mutex); | ||
853 | if (atomic_dec_return(&num_counters) == 0) | ||
854 | release_pmc_hardware(); | ||
855 | mutex_unlock(&pmc_reserve_mutex); | ||
856 | } | ||
857 | } | ||
858 | |||
859 | /* | ||
860 | * Translate a generic cache event config to a raw event code. | ||
861 | */ | ||
862 | static int hw_perf_cache_event(u64 config, u64 *eventp) | ||
863 | { | ||
864 | unsigned long type, op, result; | ||
865 | int ev; | ||
866 | |||
867 | if (!ppmu->cache_events) | ||
868 | return -EINVAL; | ||
869 | |||
870 | /* unpack config */ | ||
871 | type = config & 0xff; | ||
872 | op = (config >> 8) & 0xff; | ||
873 | result = (config >> 16) & 0xff; | ||
874 | |||
875 | if (type >= PERF_COUNT_HW_CACHE_MAX || | ||
876 | op >= PERF_COUNT_HW_CACHE_OP_MAX || | ||
877 | result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
878 | return -EINVAL; | ||
879 | |||
880 | ev = (*ppmu->cache_events)[type][op][result]; | ||
881 | if (ev == 0) | ||
882 | return -EOPNOTSUPP; | ||
883 | if (ev == -1) | ||
884 | return -EINVAL; | ||
885 | *eventp = ev; | ||
886 | return 0; | ||
887 | } | ||
888 | |||
889 | const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | ||
890 | { | ||
891 | u64 ev; | ||
892 | unsigned long flags; | ||
893 | struct perf_counter *ctrs[MAX_HWCOUNTERS]; | ||
894 | u64 events[MAX_HWCOUNTERS]; | ||
895 | unsigned int cflags[MAX_HWCOUNTERS]; | ||
896 | int n; | ||
897 | int err; | ||
898 | |||
899 | if (!ppmu) | ||
900 | return ERR_PTR(-ENXIO); | ||
901 | switch (counter->attr.type) { | ||
902 | case PERF_TYPE_HARDWARE: | ||
903 | ev = counter->attr.config; | ||
904 | if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) | ||
905 | return ERR_PTR(-EOPNOTSUPP); | ||
906 | ev = ppmu->generic_events[ev]; | ||
907 | break; | ||
908 | case PERF_TYPE_HW_CACHE: | ||
909 | err = hw_perf_cache_event(counter->attr.config, &ev); | ||
910 | if (err) | ||
911 | return ERR_PTR(err); | ||
912 | break; | ||
913 | case PERF_TYPE_RAW: | ||
914 | ev = counter->attr.config; | ||
915 | break; | ||
916 | } | ||
917 | counter->hw.config_base = ev; | ||
918 | counter->hw.idx = 0; | ||
919 | |||
920 | /* | ||
921 | * If we are not running on a hypervisor, force the | ||
922 | * exclude_hv bit to 0 so that we don't care what | ||
923 | * the user set it to. | ||
924 | */ | ||
925 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | ||
926 | counter->attr.exclude_hv = 0; | ||
927 | |||
928 | /* | ||
929 | * If this is a per-task counter, then we can use | ||
930 | * PM_RUN_* events interchangeably with their non RUN_* | ||
931 | * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. | ||
932 | * XXX we should check if the task is an idle task. | ||
933 | */ | ||
934 | flags = 0; | ||
935 | if (counter->ctx->task) | ||
936 | flags |= PPMU_ONLY_COUNT_RUN; | ||
937 | |||
938 | /* | ||
939 | * If this machine has limited counters, check whether this | ||
940 | * event could go on a limited counter. | ||
941 | */ | ||
942 | if (ppmu->flags & PPMU_LIMITED_PMC5_6) { | ||
943 | if (can_go_on_limited_pmc(counter, ev, flags)) { | ||
944 | flags |= PPMU_LIMITED_PMC_OK; | ||
945 | } else if (ppmu->limited_pmc_event(ev)) { | ||
946 | /* | ||
947 | * The requested event is on a limited PMC, | ||
948 | * but we can't use a limited PMC; see if any | ||
949 | * alternative goes on a normal PMC. | ||
950 | */ | ||
951 | ev = normal_pmc_alternative(ev, flags); | ||
952 | if (!ev) | ||
953 | return ERR_PTR(-EINVAL); | ||
954 | } | ||
955 | } | ||
956 | |||
957 | /* | ||
958 | * If this is in a group, check if it can go on with all the | ||
959 | * other hardware counters in the group. We assume the counter | ||
960 | * hasn't been linked into its leader's sibling list at this point. | ||
961 | */ | ||
962 | n = 0; | ||
963 | if (counter->group_leader != counter) { | ||
964 | n = collect_events(counter->group_leader, ppmu->n_counter - 1, | ||
965 | ctrs, events, cflags); | ||
966 | if (n < 0) | ||
967 | return ERR_PTR(-EINVAL); | ||
968 | } | ||
969 | events[n] = ev; | ||
970 | ctrs[n] = counter; | ||
971 | cflags[n] = flags; | ||
972 | if (check_excludes(ctrs, cflags, n, 1)) | ||
973 | return ERR_PTR(-EINVAL); | ||
974 | if (power_check_constraints(events, cflags, n + 1)) | ||
975 | return ERR_PTR(-EINVAL); | ||
976 | |||
977 | counter->hw.config = events[n]; | ||
978 | counter->hw.counter_base = cflags[n]; | ||
979 | counter->hw.last_period = counter->hw.sample_period; | ||
980 | atomic64_set(&counter->hw.period_left, counter->hw.last_period); | ||
981 | |||
982 | /* | ||
983 | * See if we need to reserve the PMU. | ||
984 | * If no counters are currently in use, then we have to take a | ||
985 | * mutex to ensure that we don't race with another task doing | ||
986 | * reserve_pmc_hardware or release_pmc_hardware. | ||
987 | */ | ||
988 | err = 0; | ||
989 | if (!atomic_inc_not_zero(&num_counters)) { | ||
990 | mutex_lock(&pmc_reserve_mutex); | ||
991 | if (atomic_read(&num_counters) == 0 && | ||
992 | reserve_pmc_hardware(perf_counter_interrupt)) | ||
993 | err = -EBUSY; | ||
994 | else | ||
995 | atomic_inc(&num_counters); | ||
996 | mutex_unlock(&pmc_reserve_mutex); | ||
997 | } | ||
998 | counter->destroy = hw_perf_counter_destroy; | ||
999 | |||
1000 | if (err) | ||
1001 | return ERR_PTR(err); | ||
1002 | return &power_pmu; | ||
1003 | } | ||
1004 | |||
1005 | /* | ||
1006 | * A counter has overflowed; update its count and record | ||
1007 | * things if requested. Note that interrupts are hard-disabled | ||
1008 | * here so there is no possibility of being interrupted. | ||
1009 | */ | ||
1010 | static void record_and_restart(struct perf_counter *counter, long val, | ||
1011 | struct pt_regs *regs, int nmi) | ||
1012 | { | ||
1013 | u64 period = counter->hw.sample_period; | ||
1014 | s64 prev, delta, left; | ||
1015 | int record = 0; | ||
1016 | u64 addr, mmcra, sdsync; | ||
1017 | |||
1018 | /* we don't have to worry about interrupts here */ | ||
1019 | prev = atomic64_read(&counter->hw.prev_count); | ||
1020 | delta = (val - prev) & 0xfffffffful; | ||
1021 | atomic64_add(delta, &counter->count); | ||
1022 | |||
1023 | /* | ||
1024 | * See if the total period for this counter has expired, | ||
1025 | * and update for the next period. | ||
1026 | */ | ||
1027 | val = 0; | ||
1028 | left = atomic64_read(&counter->hw.period_left) - delta; | ||
1029 | if (period) { | ||
1030 | if (left <= 0) { | ||
1031 | left += period; | ||
1032 | if (left <= 0) | ||
1033 | left = period; | ||
1034 | record = 1; | ||
1035 | } | ||
1036 | if (left < 0x80000000L) | ||
1037 | val = 0x80000000L - left; | ||
1038 | } | ||
1039 | |||
1040 | /* | ||
1041 | * Finally record data if requested. | ||
1042 | */ | ||
1043 | if (record) { | ||
1044 | struct perf_sample_data data = { | ||
1045 | .regs = regs, | ||
1046 | .addr = 0, | ||
1047 | .period = counter->hw.last_period, | ||
1048 | }; | ||
1049 | |||
1050 | if (counter->attr.sample_type & PERF_SAMPLE_ADDR) { | ||
1051 | /* | ||
1052 | * The user wants a data address recorded. | ||
1053 | * If we're not doing instruction sampling, | ||
1054 | * give them the SDAR (sampled data address). | ||
1055 | * If we are doing instruction sampling, then only | ||
1056 | * give them the SDAR if it corresponds to the | ||
1057 | * instruction pointed to by SIAR; this is indicated | ||
1058 | * by the [POWER6_]MMCRA_SDSYNC bit in MMCRA. | ||
1059 | */ | ||
1060 | mmcra = regs->dsisr; | ||
1061 | sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? | ||
1062 | POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; | ||
1063 | if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) | ||
1064 | data.addr = mfspr(SPRN_SDAR); | ||
1065 | } | ||
1066 | if (perf_counter_overflow(counter, nmi, &data)) { | ||
1067 | /* | ||
1068 | * Interrupts are coming too fast - throttle them | ||
1069 | * by setting the counter to 0, so it will be | ||
1070 | * at least 2^30 cycles until the next interrupt | ||
1071 | * (assuming each counter counts at most 2 counts | ||
1072 | * per cycle). | ||
1073 | */ | ||
1074 | val = 0; | ||
1075 | left = ~0ULL >> 1; | ||
1076 | } | ||
1077 | } | ||
1078 | |||
1079 | write_pmc(counter->hw.idx, val); | ||
1080 | atomic64_set(&counter->hw.prev_count, val); | ||
1081 | atomic64_set(&counter->hw.period_left, left); | ||
1082 | perf_counter_update_userpage(counter); | ||
1083 | } | ||
1084 | |||
1085 | /* | ||
1086 | * Called from generic code to get the misc flags (i.e. processor mode) | ||
1087 | * for an event. | ||
1088 | */ | ||
1089 | unsigned long perf_misc_flags(struct pt_regs *regs) | ||
1090 | { | ||
1091 | unsigned long mmcra; | ||
1092 | |||
1093 | if (TRAP(regs) != 0xf00) { | ||
1094 | /* not a PMU interrupt */ | ||
1095 | return user_mode(regs) ? PERF_EVENT_MISC_USER : | ||
1096 | PERF_EVENT_MISC_KERNEL; | ||
1097 | } | ||
1098 | |||
1099 | mmcra = regs->dsisr; | ||
1100 | if (ppmu->flags & PPMU_ALT_SIPR) { | ||
1101 | if (mmcra & POWER6_MMCRA_SIHV) | ||
1102 | return PERF_EVENT_MISC_HYPERVISOR; | ||
1103 | return (mmcra & POWER6_MMCRA_SIPR) ? PERF_EVENT_MISC_USER : | ||
1104 | PERF_EVENT_MISC_KERNEL; | ||
1105 | } | ||
1106 | if (mmcra & MMCRA_SIHV) | ||
1107 | return PERF_EVENT_MISC_HYPERVISOR; | ||
1108 | return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER : | ||
1109 | PERF_EVENT_MISC_KERNEL; | ||
1110 | } | ||
1111 | |||
1112 | /* | ||
1113 | * Called from generic code to get the instruction pointer | ||
1114 | * for an event. | ||
1115 | */ | ||
1116 | unsigned long perf_instruction_pointer(struct pt_regs *regs) | ||
1117 | { | ||
1118 | unsigned long mmcra; | ||
1119 | unsigned long ip; | ||
1120 | unsigned long slot; | ||
1121 | |||
1122 | if (TRAP(regs) != 0xf00) | ||
1123 | return regs->nip; /* not a PMU interrupt */ | ||
1124 | |||
1125 | ip = mfspr(SPRN_SIAR); | ||
1126 | mmcra = regs->dsisr; | ||
1127 | if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { | ||
1128 | slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; | ||
1129 | if (slot > 1) | ||
1130 | ip += 4 * (slot - 1); | ||
1131 | } | ||
1132 | return ip; | ||
1133 | } | ||
1134 | |||
1135 | /* | ||
1136 | * Performance monitor interrupt stuff | ||
1137 | */ | ||
1138 | static void perf_counter_interrupt(struct pt_regs *regs) | ||
1139 | { | ||
1140 | int i; | ||
1141 | struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); | ||
1142 | struct perf_counter *counter; | ||
1143 | long val; | ||
1144 | int found = 0; | ||
1145 | int nmi; | ||
1146 | |||
1147 | if (cpuhw->n_limited) | ||
1148 | freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), | ||
1149 | mfspr(SPRN_PMC6)); | ||
1150 | |||
1151 | /* | ||
1152 | * Overload regs->dsisr to store MMCRA so we only need to read it once. | ||
1153 | */ | ||
1154 | regs->dsisr = mfspr(SPRN_MMCRA); | ||
1155 | |||
1156 | /* | ||
1157 | * If interrupts were soft-disabled when this PMU interrupt | ||
1158 | * occurred, treat it as an NMI. | ||
1159 | */ | ||
1160 | nmi = !regs->softe; | ||
1161 | if (nmi) | ||
1162 | nmi_enter(); | ||
1163 | else | ||
1164 | irq_enter(); | ||
1165 | |||
1166 | for (i = 0; i < cpuhw->n_counters; ++i) { | ||
1167 | counter = cpuhw->counter[i]; | ||
1168 | if (!counter->hw.idx || is_limited_pmc(counter->hw.idx)) | ||
1169 | continue; | ||
1170 | val = read_pmc(counter->hw.idx); | ||
1171 | if ((int)val < 0) { | ||
1172 | /* counter has overflowed */ | ||
1173 | found = 1; | ||
1174 | record_and_restart(counter, val, regs, nmi); | ||
1175 | } | ||
1176 | } | ||
1177 | |||
1178 | /* | ||
1179 | * In case we didn't find and reset the counter that caused | ||
1180 | * the interrupt, scan all counters and reset any that are | ||
1181 | * negative, to avoid getting continual interrupts. | ||
1182 | * Any that we processed in the previous loop will not be negative. | ||
1183 | */ | ||
1184 | if (!found) { | ||
1185 | for (i = 0; i < ppmu->n_counter; ++i) { | ||
1186 | if (is_limited_pmc(i + 1)) | ||
1187 | continue; | ||
1188 | val = read_pmc(i + 1); | ||
1189 | if ((int)val < 0) | ||
1190 | write_pmc(i + 1, 0); | ||
1191 | } | ||
1192 | } | ||
1193 | |||
1194 | /* | ||
1195 | * Reset MMCR0 to its normal value. This will set PMXE and | ||
1196 | * clear FC (freeze counters) and PMAO (perf mon alert occurred) | ||
1197 | * and thus allow interrupts to occur again. | ||
1198 | * XXX might want to use MSR.PM to keep the counters frozen until | ||
1199 | * we get back out of this interrupt. | ||
1200 | */ | ||
1201 | write_mmcr0(cpuhw, cpuhw->mmcr[0]); | ||
1202 | |||
1203 | if (nmi) | ||
1204 | nmi_exit(); | ||
1205 | else | ||
1206 | irq_exit(); | ||
1207 | } | ||
1208 | |||
1209 | void hw_perf_counter_setup(int cpu) | ||
1210 | { | ||
1211 | struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); | ||
1212 | |||
1213 | memset(cpuhw, 0, sizeof(*cpuhw)); | ||
1214 | cpuhw->mmcr[0] = MMCR0_FC; | ||
1215 | } | ||
1216 | |||
1217 | extern struct power_pmu power4_pmu; | ||
1218 | extern struct power_pmu ppc970_pmu; | ||
1219 | extern struct power_pmu power5_pmu; | ||
1220 | extern struct power_pmu power5p_pmu; | ||
1221 | extern struct power_pmu power6_pmu; | ||
1222 | extern struct power_pmu power7_pmu; | ||
1223 | |||
1224 | static int init_perf_counters(void) | ||
1225 | { | ||
1226 | unsigned long pvr; | ||
1227 | |||
1228 | /* XXX should get this from cputable */ | ||
1229 | pvr = mfspr(SPRN_PVR); | ||
1230 | switch (PVR_VER(pvr)) { | ||
1231 | case PV_POWER4: | ||
1232 | case PV_POWER4p: | ||
1233 | ppmu = &power4_pmu; | ||
1234 | break; | ||
1235 | case PV_970: | ||
1236 | case PV_970FX: | ||
1237 | case PV_970MP: | ||
1238 | ppmu = &ppc970_pmu; | ||
1239 | break; | ||
1240 | case PV_POWER5: | ||
1241 | ppmu = &power5_pmu; | ||
1242 | break; | ||
1243 | case PV_POWER5p: | ||
1244 | ppmu = &power5p_pmu; | ||
1245 | break; | ||
1246 | case 0x3e: | ||
1247 | ppmu = &power6_pmu; | ||
1248 | break; | ||
1249 | case 0x3f: | ||
1250 | ppmu = &power7_pmu; | ||
1251 | break; | ||
1252 | } | ||
1253 | |||
1254 | /* | ||
1255 | * Use FCHV to ignore kernel events if MSR.HV is set. | ||
1256 | */ | ||
1257 | if (mfmsr() & MSR_HV) | ||
1258 | freeze_counters_kernel = MMCR0_FCHV; | ||
1259 | |||
1260 | return 0; | ||
1261 | } | ||
1262 | |||
1263 | arch_initcall(init_perf_counters); | ||
diff --git a/arch/powerpc/kernel/power4-pmu.c b/arch/powerpc/kernel/power4-pmu.c new file mode 100644 index 000000000000..07bd308a5fa7 --- /dev/null +++ b/arch/powerpc/kernel/power4-pmu.c | |||
@@ -0,0 +1,598 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for POWER4 | ||
17 | */ | ||
18 | #define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0xf | ||
20 | #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ | ||
21 | #define PM_UNIT_MSK 0xf | ||
22 | #define PM_LOWER_SH 6 | ||
23 | #define PM_LOWER_MSK 1 | ||
24 | #define PM_LOWER_MSKS 0x40 | ||
25 | #define PM_BYTE_SH 4 /* Byte number of event bus to use */ | ||
26 | #define PM_BYTE_MSK 3 | ||
27 | #define PM_PMCSEL_MSK 7 | ||
28 | |||
29 | /* | ||
30 | * Unit code values | ||
31 | */ | ||
32 | #define PM_FPU 1 | ||
33 | #define PM_ISU1 2 | ||
34 | #define PM_IFU 3 | ||
35 | #define PM_IDU0 4 | ||
36 | #define PM_ISU1_ALT 6 | ||
37 | #define PM_ISU2 7 | ||
38 | #define PM_IFU_ALT 8 | ||
39 | #define PM_LSU0 9 | ||
40 | #define PM_LSU1 0xc | ||
41 | #define PM_GPS 0xf | ||
42 | |||
43 | /* | ||
44 | * Bits in MMCR0 for POWER4 | ||
45 | */ | ||
46 | #define MMCR0_PMC1SEL_SH 8 | ||
47 | #define MMCR0_PMC2SEL_SH 1 | ||
48 | #define MMCR_PMCSEL_MSK 0x1f | ||
49 | |||
50 | /* | ||
51 | * Bits in MMCR1 for POWER4 | ||
52 | */ | ||
53 | #define MMCR1_TTM0SEL_SH 62 | ||
54 | #define MMCR1_TTC0SEL_SH 61 | ||
55 | #define MMCR1_TTM1SEL_SH 59 | ||
56 | #define MMCR1_TTC1SEL_SH 58 | ||
57 | #define MMCR1_TTM2SEL_SH 56 | ||
58 | #define MMCR1_TTC2SEL_SH 55 | ||
59 | #define MMCR1_TTM3SEL_SH 53 | ||
60 | #define MMCR1_TTC3SEL_SH 52 | ||
61 | #define MMCR1_TTMSEL_MSK 3 | ||
62 | #define MMCR1_TD_CP_DBG0SEL_SH 50 | ||
63 | #define MMCR1_TD_CP_DBG1SEL_SH 48 | ||
64 | #define MMCR1_TD_CP_DBG2SEL_SH 46 | ||
65 | #define MMCR1_TD_CP_DBG3SEL_SH 44 | ||
66 | #define MMCR1_DEBUG0SEL_SH 43 | ||
67 | #define MMCR1_DEBUG1SEL_SH 42 | ||
68 | #define MMCR1_DEBUG2SEL_SH 41 | ||
69 | #define MMCR1_DEBUG3SEL_SH 40 | ||
70 | #define MMCR1_PMC1_ADDER_SEL_SH 39 | ||
71 | #define MMCR1_PMC2_ADDER_SEL_SH 38 | ||
72 | #define MMCR1_PMC6_ADDER_SEL_SH 37 | ||
73 | #define MMCR1_PMC5_ADDER_SEL_SH 36 | ||
74 | #define MMCR1_PMC8_ADDER_SEL_SH 35 | ||
75 | #define MMCR1_PMC7_ADDER_SEL_SH 34 | ||
76 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
77 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
78 | #define MMCR1_PMC3SEL_SH 27 | ||
79 | #define MMCR1_PMC4SEL_SH 22 | ||
80 | #define MMCR1_PMC5SEL_SH 17 | ||
81 | #define MMCR1_PMC6SEL_SH 12 | ||
82 | #define MMCR1_PMC7SEL_SH 7 | ||
83 | #define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */ | ||
84 | |||
85 | static short mmcr1_adder_bits[8] = { | ||
86 | MMCR1_PMC1_ADDER_SEL_SH, | ||
87 | MMCR1_PMC2_ADDER_SEL_SH, | ||
88 | MMCR1_PMC3_ADDER_SEL_SH, | ||
89 | MMCR1_PMC4_ADDER_SEL_SH, | ||
90 | MMCR1_PMC5_ADDER_SEL_SH, | ||
91 | MMCR1_PMC6_ADDER_SEL_SH, | ||
92 | MMCR1_PMC7_ADDER_SEL_SH, | ||
93 | MMCR1_PMC8_ADDER_SEL_SH | ||
94 | }; | ||
95 | |||
96 | /* | ||
97 | * Bits in MMCRA | ||
98 | */ | ||
99 | #define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */ | ||
100 | |||
101 | /* | ||
102 | * Layout of constraint bits: | ||
103 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
104 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
105 | * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><> | ||
106 | * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 | ||
107 | * \SMPL ||\TTC3SEL | ||
108 | * |\TTC_IFU_SEL | ||
109 | * \TTM2SEL0 | ||
110 | * | ||
111 | * SMPL - SAMPLE_ENABLE constraint | ||
112 | * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000 | ||
113 | * | ||
114 | * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2 | ||
115 | * 55: UC1 error 0x0080_0000_0000_0000 | ||
116 | * 54: FPU events needed 0x0040_0000_0000_0000 | ||
117 | * 53: ISU1 events needed 0x0020_0000_0000_0000 | ||
118 | * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000 | ||
119 | * | ||
120 | * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0 | ||
121 | * 51: UC2 error 0x0008_0000_0000_0000 | ||
122 | * 50: FPU events needed 0x0004_0000_0000_0000 | ||
123 | * 49: IFU events needed 0x0002_0000_0000_0000 | ||
124 | * 48: LSU0 events needed 0x0001_0000_0000_0000 | ||
125 | * | ||
126 | * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1 | ||
127 | * 47: UC3 error 0x8000_0000_0000 | ||
128 | * 46: LSU0 events needed 0x4000_0000_0000 | ||
129 | * 45: IFU events needed 0x2000_0000_0000 | ||
130 | * 44: IDU0|ISU2 events needed 0x1000_0000_0000 | ||
131 | * 43: ISU1 events needed 0x0800_0000_0000 | ||
132 | * | ||
133 | * TTM2SEL0 | ||
134 | * 42: 0 = IDU0 events needed | ||
135 | * 1 = ISU2 events needed 0x0400_0000_0000 | ||
136 | * | ||
137 | * TTC_IFU_SEL | ||
138 | * 41: 0 = IFU.U events needed | ||
139 | * 1 = IFU.L events needed 0x0200_0000_0000 | ||
140 | * | ||
141 | * TTC3SEL | ||
142 | * 40: 0 = LSU1.U events needed | ||
143 | * 1 = LSU1.L events needed 0x0100_0000_0000 | ||
144 | * | ||
145 | * PS1 | ||
146 | * 39: PS1 error 0x0080_0000_0000 | ||
147 | * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 | ||
148 | * | ||
149 | * PS2 | ||
150 | * 35: PS2 error 0x0008_0000_0000 | ||
151 | * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 | ||
152 | * | ||
153 | * B0 | ||
154 | * 28-31: Byte 0 event source 0xf000_0000 | ||
155 | * 1 = FPU | ||
156 | * 2 = ISU1 | ||
157 | * 3 = IFU | ||
158 | * 4 = IDU0 | ||
159 | * 7 = ISU2 | ||
160 | * 9 = LSU0 | ||
161 | * c = LSU1 | ||
162 | * f = GPS | ||
163 | * | ||
164 | * B1, B2, B3 | ||
165 | * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources | ||
166 | * | ||
167 | * P8 | ||
168 | * 15: P8 error 0x8000 | ||
169 | * 14-15: Count of events needing PMC8 | ||
170 | * | ||
171 | * P1..P7 | ||
172 | * 0-13: Count of events needing PMC1..PMC7 | ||
173 | * | ||
174 | * Note: this doesn't allow events using IFU.U to be combined with events | ||
175 | * using IFU.L, though that is feasible (using TTM0 and TTM2). However | ||
176 | * there are no listed events for IFU.L (they are debug events not | ||
177 | * verified for performance monitoring) so this shouldn't cause a | ||
178 | * problem. | ||
179 | */ | ||
180 | |||
181 | static struct unitinfo { | ||
182 | u64 value, mask; | ||
183 | int unit; | ||
184 | int lowerbit; | ||
185 | } p4_unitinfo[16] = { | ||
186 | [PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 }, | ||
187 | [PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, | ||
188 | [PM_ISU1_ALT] = | ||
189 | { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, | ||
190 | [PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, | ||
191 | [PM_IFU_ALT] = | ||
192 | { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, | ||
193 | [PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 }, | ||
194 | [PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 }, | ||
195 | [PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 }, | ||
196 | [PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 }, | ||
197 | [PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 } | ||
198 | }; | ||
199 | |||
200 | static unsigned char direct_marked_event[8] = { | ||
201 | (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ | ||
202 | (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ | ||
203 | (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */ | ||
204 | (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ | ||
205 | (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */ | ||
206 | (1<<3) | (1<<4) | (1<<5), | ||
207 | /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ | ||
208 | (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ | ||
209 | (1<<4), /* PMC8: PM_MRK_LSU_FIN */ | ||
210 | }; | ||
211 | |||
212 | /* | ||
213 | * Returns 1 if event counts things relating to marked instructions | ||
214 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
215 | */ | ||
216 | static int p4_marked_instr_event(u64 event) | ||
217 | { | ||
218 | int pmc, psel, unit, byte, bit; | ||
219 | unsigned int mask; | ||
220 | |||
221 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
222 | psel = event & PM_PMCSEL_MSK; | ||
223 | if (pmc) { | ||
224 | if (direct_marked_event[pmc - 1] & (1 << psel)) | ||
225 | return 1; | ||
226 | if (psel == 0) /* add events */ | ||
227 | bit = (pmc <= 4)? pmc - 1: 8 - pmc; | ||
228 | else if (psel == 6) /* decode events */ | ||
229 | bit = 4; | ||
230 | else | ||
231 | return 0; | ||
232 | } else | ||
233 | bit = psel; | ||
234 | |||
235 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
236 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
237 | mask = 0; | ||
238 | switch (unit) { | ||
239 | case PM_LSU1: | ||
240 | if (event & PM_LOWER_MSKS) | ||
241 | mask = 1 << 28; /* byte 7 bit 4 */ | ||
242 | else | ||
243 | mask = 6 << 24; /* byte 3 bits 1 and 2 */ | ||
244 | break; | ||
245 | case PM_LSU0: | ||
246 | /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */ | ||
247 | mask = 0x083dff00; | ||
248 | } | ||
249 | return (mask >> (byte * 8 + bit)) & 1; | ||
250 | } | ||
251 | |||
252 | static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp) | ||
253 | { | ||
254 | int pmc, byte, unit, lower, sh; | ||
255 | u64 mask = 0, value = 0; | ||
256 | int grp = -1; | ||
257 | |||
258 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
259 | if (pmc) { | ||
260 | if (pmc > 8) | ||
261 | return -1; | ||
262 | sh = (pmc - 1) * 2; | ||
263 | mask |= 2 << sh; | ||
264 | value |= 1 << sh; | ||
265 | grp = ((pmc - 1) >> 1) & 1; | ||
266 | } | ||
267 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
268 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
269 | if (unit) { | ||
270 | lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK; | ||
271 | |||
272 | /* | ||
273 | * Bus events on bytes 0 and 2 can be counted | ||
274 | * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. | ||
275 | */ | ||
276 | if (!pmc) | ||
277 | grp = byte & 1; | ||
278 | |||
279 | if (!p4_unitinfo[unit].unit) | ||
280 | return -1; | ||
281 | mask |= p4_unitinfo[unit].mask; | ||
282 | value |= p4_unitinfo[unit].value; | ||
283 | sh = p4_unitinfo[unit].lowerbit; | ||
284 | if (sh > 1) | ||
285 | value |= (u64)lower << sh; | ||
286 | else if (lower != sh) | ||
287 | return -1; | ||
288 | unit = p4_unitinfo[unit].unit; | ||
289 | |||
290 | /* Set byte lane select field */ | ||
291 | mask |= 0xfULL << (28 - 4 * byte); | ||
292 | value |= (u64)unit << (28 - 4 * byte); | ||
293 | } | ||
294 | if (grp == 0) { | ||
295 | /* increment PMC1/2/5/6 field */ | ||
296 | mask |= 0x8000000000ull; | ||
297 | value |= 0x1000000000ull; | ||
298 | } else { | ||
299 | /* increment PMC3/4/7/8 field */ | ||
300 | mask |= 0x800000000ull; | ||
301 | value |= 0x100000000ull; | ||
302 | } | ||
303 | |||
304 | /* Marked instruction events need sample_enable set */ | ||
305 | if (p4_marked_instr_event(event)) { | ||
306 | mask |= 1ull << 56; | ||
307 | value |= 1ull << 56; | ||
308 | } | ||
309 | |||
310 | /* PMCSEL=6 decode events on byte 2 need sample_enable clear */ | ||
311 | if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2) | ||
312 | mask |= 1ull << 56; | ||
313 | |||
314 | *maskp = mask; | ||
315 | *valp = value; | ||
316 | return 0; | ||
317 | } | ||
318 | |||
319 | static unsigned int ppc_inst_cmpl[] = { | ||
320 | 0x1001, 0x4001, 0x6001, 0x7001, 0x8001 | ||
321 | }; | ||
322 | |||
323 | static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
324 | { | ||
325 | int i, j, na; | ||
326 | |||
327 | alt[0] = event; | ||
328 | na = 1; | ||
329 | |||
330 | /* 2 possibilities for PM_GRP_DISP_REJECT */ | ||
331 | if (event == 0x8003 || event == 0x0224) { | ||
332 | alt[1] = event ^ (0x8003 ^ 0x0224); | ||
333 | return 2; | ||
334 | } | ||
335 | |||
336 | /* 2 possibilities for PM_ST_MISS_L1 */ | ||
337 | if (event == 0x0c13 || event == 0x0c23) { | ||
338 | alt[1] = event ^ (0x0c13 ^ 0x0c23); | ||
339 | return 2; | ||
340 | } | ||
341 | |||
342 | /* several possibilities for PM_INST_CMPL */ | ||
343 | for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) { | ||
344 | if (event == ppc_inst_cmpl[i]) { | ||
345 | for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j) | ||
346 | if (j != i) | ||
347 | alt[na++] = ppc_inst_cmpl[j]; | ||
348 | break; | ||
349 | } | ||
350 | } | ||
351 | |||
352 | return na; | ||
353 | } | ||
354 | |||
355 | static int p4_compute_mmcr(u64 event[], int n_ev, | ||
356 | unsigned int hwc[], u64 mmcr[]) | ||
357 | { | ||
358 | u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; | ||
359 | unsigned int pmc, unit, byte, psel, lower; | ||
360 | unsigned int ttm, grp; | ||
361 | unsigned int pmc_inuse = 0; | ||
362 | unsigned int pmc_grp_use[2]; | ||
363 | unsigned char busbyte[4]; | ||
364 | unsigned char unituse[16]; | ||
365 | unsigned int unitlower = 0; | ||
366 | int i; | ||
367 | |||
368 | if (n_ev > 8) | ||
369 | return -1; | ||
370 | |||
371 | /* First pass to count resource use */ | ||
372 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
373 | memset(busbyte, 0, sizeof(busbyte)); | ||
374 | memset(unituse, 0, sizeof(unituse)); | ||
375 | for (i = 0; i < n_ev; ++i) { | ||
376 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
377 | if (pmc) { | ||
378 | if (pmc_inuse & (1 << (pmc - 1))) | ||
379 | return -1; | ||
380 | pmc_inuse |= 1 << (pmc - 1); | ||
381 | /* count 1/2/5/6 vs 3/4/7/8 use */ | ||
382 | ++pmc_grp_use[((pmc - 1) >> 1) & 1]; | ||
383 | } | ||
384 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
385 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
386 | lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK; | ||
387 | if (unit) { | ||
388 | if (!pmc) | ||
389 | ++pmc_grp_use[byte & 1]; | ||
390 | if (unit == 6 || unit == 8) | ||
391 | /* map alt ISU1/IFU codes: 6->2, 8->3 */ | ||
392 | unit = (unit >> 1) - 1; | ||
393 | if (busbyte[byte] && busbyte[byte] != unit) | ||
394 | return -1; | ||
395 | busbyte[byte] = unit; | ||
396 | lower <<= unit; | ||
397 | if (unituse[unit] && lower != (unitlower & lower)) | ||
398 | return -1; | ||
399 | unituse[unit] = 1; | ||
400 | unitlower |= lower; | ||
401 | } | ||
402 | } | ||
403 | if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) | ||
404 | return -1; | ||
405 | |||
406 | /* | ||
407 | * Assign resources and set multiplexer selects. | ||
408 | * | ||
409 | * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2. | ||
410 | * Each TTMx can only select one unit, but since | ||
411 | * units 2 and 6 are both ISU1, and 3 and 8 are both IFU, | ||
412 | * we have some choices. | ||
413 | */ | ||
414 | if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) { | ||
415 | unituse[6] = 1; /* Move 2 to 6 */ | ||
416 | unituse[2] = 0; | ||
417 | } | ||
418 | if (unituse[3] & (unituse[1] | unituse[2])) { | ||
419 | unituse[8] = 1; /* Move 3 to 8 */ | ||
420 | unituse[3] = 0; | ||
421 | unitlower = (unitlower & ~8) | ((unitlower & 8) << 5); | ||
422 | } | ||
423 | /* Check only one unit per TTMx */ | ||
424 | if (unituse[1] + unituse[2] + unituse[3] > 1 || | ||
425 | unituse[4] + unituse[6] + unituse[7] > 1 || | ||
426 | unituse[8] + unituse[9] > 1 || | ||
427 | (unituse[5] | unituse[10] | unituse[11] | | ||
428 | unituse[13] | unituse[14])) | ||
429 | return -1; | ||
430 | |||
431 | /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ | ||
432 | mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH; | ||
433 | mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH; | ||
434 | mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH; | ||
435 | |||
436 | /* Set TTCxSEL fields. */ | ||
437 | if (unitlower & 0xe) | ||
438 | mmcr1 |= 1ull << MMCR1_TTC0SEL_SH; | ||
439 | if (unitlower & 0xf0) | ||
440 | mmcr1 |= 1ull << MMCR1_TTC1SEL_SH; | ||
441 | if (unitlower & 0xf00) | ||
442 | mmcr1 |= 1ull << MMCR1_TTC2SEL_SH; | ||
443 | if (unitlower & 0x7000) | ||
444 | mmcr1 |= 1ull << MMCR1_TTC3SEL_SH; | ||
445 | |||
446 | /* Set byte lane select fields. */ | ||
447 | for (byte = 0; byte < 4; ++byte) { | ||
448 | unit = busbyte[byte]; | ||
449 | if (!unit) | ||
450 | continue; | ||
451 | if (unit == 0xf) { | ||
452 | /* special case for GPS */ | ||
453 | mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte); | ||
454 | } else { | ||
455 | if (!unituse[unit]) | ||
456 | ttm = unit - 1; /* 2->1, 3->2 */ | ||
457 | else | ||
458 | ttm = unit >> 2; | ||
459 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte); | ||
460 | } | ||
461 | } | ||
462 | |||
463 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
464 | for (i = 0; i < n_ev; ++i) { | ||
465 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
466 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
467 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
468 | psel = event[i] & PM_PMCSEL_MSK; | ||
469 | if (!pmc) { | ||
470 | /* Bus event or 00xxx direct event (off or cycles) */ | ||
471 | if (unit) | ||
472 | psel |= 0x10 | ((byte & 2) << 2); | ||
473 | for (pmc = 0; pmc < 8; ++pmc) { | ||
474 | if (pmc_inuse & (1 << pmc)) | ||
475 | continue; | ||
476 | grp = (pmc >> 1) & 1; | ||
477 | if (unit) { | ||
478 | if (grp == (byte & 1)) | ||
479 | break; | ||
480 | } else if (pmc_grp_use[grp] < 4) { | ||
481 | ++pmc_grp_use[grp]; | ||
482 | break; | ||
483 | } | ||
484 | } | ||
485 | pmc_inuse |= 1 << pmc; | ||
486 | } else { | ||
487 | /* Direct event */ | ||
488 | --pmc; | ||
489 | if (psel == 0 && (byte & 2)) | ||
490 | /* add events on higher-numbered bus */ | ||
491 | mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; | ||
492 | else if (psel == 6 && byte == 3) | ||
493 | /* seem to need to set sample_enable here */ | ||
494 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
495 | psel |= 8; | ||
496 | } | ||
497 | if (pmc <= 1) | ||
498 | mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc); | ||
499 | else | ||
500 | mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); | ||
501 | if (pmc == 7) /* PMC8 */ | ||
502 | mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH; | ||
503 | hwc[i] = pmc; | ||
504 | if (p4_marked_instr_event(event[i])) | ||
505 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
506 | } | ||
507 | |||
508 | if (pmc_inuse & 1) | ||
509 | mmcr0 |= MMCR0_PMC1CE; | ||
510 | if (pmc_inuse & 0xfe) | ||
511 | mmcr0 |= MMCR0_PMCjCE; | ||
512 | |||
513 | mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ | ||
514 | |||
515 | /* Return MMCRx values */ | ||
516 | mmcr[0] = mmcr0; | ||
517 | mmcr[1] = mmcr1; | ||
518 | mmcr[2] = mmcra; | ||
519 | return 0; | ||
520 | } | ||
521 | |||
522 | static void p4_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
523 | { | ||
524 | /* | ||
525 | * Setting the PMCxSEL field to 0 disables PMC x. | ||
526 | * (Note that pmc is 0-based here, not 1-based.) | ||
527 | */ | ||
528 | if (pmc <= 1) { | ||
529 | mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc)); | ||
530 | } else { | ||
531 | mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2))); | ||
532 | if (pmc == 7) | ||
533 | mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH); | ||
534 | } | ||
535 | } | ||
536 | |||
537 | static int p4_generic_events[] = { | ||
538 | [PERF_COUNT_HW_CPU_CYCLES] = 7, | ||
539 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001, | ||
540 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */ | ||
541 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */ | ||
542 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */ | ||
543 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */ | ||
544 | }; | ||
545 | |||
546 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
547 | |||
548 | /* | ||
549 | * Table of generalized cache-related events. | ||
550 | * 0 means not supported, -1 means nonsensical, other values | ||
551 | * are event codes. | ||
552 | */ | ||
553 | static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
554 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
555 | [C(OP_READ)] = { 0x8c10, 0x3c10 }, | ||
556 | [C(OP_WRITE)] = { 0x7c10, 0xc13 }, | ||
557 | [C(OP_PREFETCH)] = { 0xc35, 0 }, | ||
558 | }, | ||
559 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
560 | [C(OP_READ)] = { 0, 0 }, | ||
561 | [C(OP_WRITE)] = { -1, -1 }, | ||
562 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
563 | }, | ||
564 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
565 | [C(OP_READ)] = { 0, 0 }, | ||
566 | [C(OP_WRITE)] = { 0, 0 }, | ||
567 | [C(OP_PREFETCH)] = { 0xc34, 0 }, | ||
568 | }, | ||
569 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
570 | [C(OP_READ)] = { 0, 0x904 }, | ||
571 | [C(OP_WRITE)] = { -1, -1 }, | ||
572 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
573 | }, | ||
574 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
575 | [C(OP_READ)] = { 0, 0x900 }, | ||
576 | [C(OP_WRITE)] = { -1, -1 }, | ||
577 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
578 | }, | ||
579 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
580 | [C(OP_READ)] = { 0x330, 0x331 }, | ||
581 | [C(OP_WRITE)] = { -1, -1 }, | ||
582 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
583 | }, | ||
584 | }; | ||
585 | |||
586 | struct power_pmu power4_pmu = { | ||
587 | .n_counter = 8, | ||
588 | .max_alternatives = 5, | ||
589 | .add_fields = 0x0000001100005555ull, | ||
590 | .test_adder = 0x0011083300000000ull, | ||
591 | .compute_mmcr = p4_compute_mmcr, | ||
592 | .get_constraint = p4_get_constraint, | ||
593 | .get_alternatives = p4_get_alternatives, | ||
594 | .disable_pmc = p4_disable_pmc, | ||
595 | .n_generic = ARRAY_SIZE(p4_generic_events), | ||
596 | .generic_events = p4_generic_events, | ||
597 | .cache_events = &power4_cache_events, | ||
598 | }; | ||
diff --git a/arch/powerpc/kernel/power5+-pmu.c b/arch/powerpc/kernel/power5+-pmu.c new file mode 100644 index 000000000000..41e5d2d958d4 --- /dev/null +++ b/arch/powerpc/kernel/power5+-pmu.c | |||
@@ -0,0 +1,671 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER5+/++ (not POWER5) processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) | ||
17 | */ | ||
18 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0xf | ||
20 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
21 | #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ | ||
22 | #define PM_UNIT_MSK 0xf | ||
23 | #define PM_BYTE_SH 12 /* Byte number of event bus to use */ | ||
24 | #define PM_BYTE_MSK 7 | ||
25 | #define PM_GRS_SH 8 /* Storage subsystem mux select */ | ||
26 | #define PM_GRS_MSK 7 | ||
27 | #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ | ||
28 | #define PM_PMCSEL_MSK 0x7f | ||
29 | |||
30 | /* Values in PM_UNIT field */ | ||
31 | #define PM_FPU 0 | ||
32 | #define PM_ISU0 1 | ||
33 | #define PM_IFU 2 | ||
34 | #define PM_ISU1 3 | ||
35 | #define PM_IDU 4 | ||
36 | #define PM_ISU0_ALT 6 | ||
37 | #define PM_GRS 7 | ||
38 | #define PM_LSU0 8 | ||
39 | #define PM_LSU1 0xc | ||
40 | #define PM_LASTUNIT 0xc | ||
41 | |||
42 | /* | ||
43 | * Bits in MMCR1 for POWER5+ | ||
44 | */ | ||
45 | #define MMCR1_TTM0SEL_SH 62 | ||
46 | #define MMCR1_TTM1SEL_SH 60 | ||
47 | #define MMCR1_TTM2SEL_SH 58 | ||
48 | #define MMCR1_TTM3SEL_SH 56 | ||
49 | #define MMCR1_TTMSEL_MSK 3 | ||
50 | #define MMCR1_TD_CP_DBG0SEL_SH 54 | ||
51 | #define MMCR1_TD_CP_DBG1SEL_SH 52 | ||
52 | #define MMCR1_TD_CP_DBG2SEL_SH 50 | ||
53 | #define MMCR1_TD_CP_DBG3SEL_SH 48 | ||
54 | #define MMCR1_GRS_L2SEL_SH 46 | ||
55 | #define MMCR1_GRS_L2SEL_MSK 3 | ||
56 | #define MMCR1_GRS_L3SEL_SH 44 | ||
57 | #define MMCR1_GRS_L3SEL_MSK 3 | ||
58 | #define MMCR1_GRS_MCSEL_SH 41 | ||
59 | #define MMCR1_GRS_MCSEL_MSK 7 | ||
60 | #define MMCR1_GRS_FABSEL_SH 39 | ||
61 | #define MMCR1_GRS_FABSEL_MSK 3 | ||
62 | #define MMCR1_PMC1_ADDER_SEL_SH 35 | ||
63 | #define MMCR1_PMC2_ADDER_SEL_SH 34 | ||
64 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
65 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
66 | #define MMCR1_PMC1SEL_SH 25 | ||
67 | #define MMCR1_PMC2SEL_SH 17 | ||
68 | #define MMCR1_PMC3SEL_SH 9 | ||
69 | #define MMCR1_PMC4SEL_SH 1 | ||
70 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
71 | #define MMCR1_PMCSEL_MSK 0x7f | ||
72 | |||
73 | /* | ||
74 | * Bits in MMCRA | ||
75 | */ | ||
76 | |||
77 | /* | ||
78 | * Layout of constraint bits: | ||
79 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
80 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
81 | * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><> | ||
82 | * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1 | ||
83 | * | ||
84 | * NC - number of counters | ||
85 | * 51: NC error 0x0008_0000_0000_0000 | ||
86 | * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 | ||
87 | * | ||
88 | * G0..G3 - GRS mux constraints | ||
89 | * 46-47: GRS_L2SEL value | ||
90 | * 44-45: GRS_L3SEL value | ||
91 | * 41-44: GRS_MCSEL value | ||
92 | * 39-40: GRS_FABSEL value | ||
93 | * Note that these match up with their bit positions in MMCR1 | ||
94 | * | ||
95 | * T0 - TTM0 constraint | ||
96 | * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000 | ||
97 | * | ||
98 | * T1 - TTM1 constraint | ||
99 | * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000 | ||
100 | * | ||
101 | * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS | ||
102 | * 33: UC3 error 0x02_0000_0000 | ||
103 | * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000 | ||
104 | * 31: ISU0 events needed 0x01_8000_0000 | ||
105 | * 30: IDU|GRS events needed 0x00_4000_0000 | ||
106 | * | ||
107 | * B0 | ||
108 | * 24-27: Byte 0 event source 0x0f00_0000 | ||
109 | * Encoding as for the event code | ||
110 | * | ||
111 | * B1, B2, B3 | ||
112 | * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources | ||
113 | * | ||
114 | * P6 | ||
115 | * 11: P6 error 0x800 | ||
116 | * 10-11: Count of events needing PMC6 | ||
117 | * | ||
118 | * P1..P5 | ||
119 | * 0-9: Count of events needing PMC1..PMC5 | ||
120 | */ | ||
121 | |||
122 | static const int grsel_shift[8] = { | ||
123 | MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, | ||
124 | MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, | ||
125 | MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH | ||
126 | }; | ||
127 | |||
128 | /* Masks and values for using events from the various units */ | ||
129 | static u64 unit_cons[PM_LASTUNIT+1][2] = { | ||
130 | [PM_FPU] = { 0x3200000000ull, 0x0100000000ull }, | ||
131 | [PM_ISU0] = { 0x0200000000ull, 0x0080000000ull }, | ||
132 | [PM_ISU1] = { 0x3200000000ull, 0x3100000000ull }, | ||
133 | [PM_IFU] = { 0x3200000000ull, 0x2100000000ull }, | ||
134 | [PM_IDU] = { 0x0e00000000ull, 0x0040000000ull }, | ||
135 | [PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull }, | ||
136 | }; | ||
137 | |||
138 | static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp) | ||
139 | { | ||
140 | int pmc, byte, unit, sh; | ||
141 | int bit, fmask; | ||
142 | u64 mask = 0, value = 0; | ||
143 | |||
144 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
145 | if (pmc) { | ||
146 | if (pmc > 6) | ||
147 | return -1; | ||
148 | sh = (pmc - 1) * 2; | ||
149 | mask |= 2 << sh; | ||
150 | value |= 1 << sh; | ||
151 | if (pmc >= 5 && !(event == 0x500009 || event == 0x600005)) | ||
152 | return -1; | ||
153 | } | ||
154 | if (event & PM_BUSEVENT_MSK) { | ||
155 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
156 | if (unit > PM_LASTUNIT) | ||
157 | return -1; | ||
158 | if (unit == PM_ISU0_ALT) | ||
159 | unit = PM_ISU0; | ||
160 | mask |= unit_cons[unit][0]; | ||
161 | value |= unit_cons[unit][1]; | ||
162 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
163 | if (byte >= 4) { | ||
164 | if (unit != PM_LSU1) | ||
165 | return -1; | ||
166 | /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ | ||
167 | ++unit; | ||
168 | byte &= 3; | ||
169 | } | ||
170 | if (unit == PM_GRS) { | ||
171 | bit = event & 7; | ||
172 | fmask = (bit == 6)? 7: 3; | ||
173 | sh = grsel_shift[bit]; | ||
174 | mask |= (u64)fmask << sh; | ||
175 | value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; | ||
176 | } | ||
177 | /* Set byte lane select field */ | ||
178 | mask |= 0xfULL << (24 - 4 * byte); | ||
179 | value |= (u64)unit << (24 - 4 * byte); | ||
180 | } | ||
181 | if (pmc < 5) { | ||
182 | /* need a counter from PMC1-4 set */ | ||
183 | mask |= 0x8000000000000ull; | ||
184 | value |= 0x1000000000000ull; | ||
185 | } | ||
186 | *maskp = mask; | ||
187 | *valp = value; | ||
188 | return 0; | ||
189 | } | ||
190 | |||
191 | static int power5p_limited_pmc_event(u64 event) | ||
192 | { | ||
193 | int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
194 | |||
195 | return pmc == 5 || pmc == 6; | ||
196 | } | ||
197 | |||
198 | #define MAX_ALT 3 /* at most 3 alternatives for any event */ | ||
199 | |||
200 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
201 | { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */ | ||
202 | { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ | ||
203 | { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */ | ||
204 | { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */ | ||
205 | { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ | ||
206 | { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */ | ||
207 | { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */ | ||
208 | { 0x100005, 0x600005 }, /* PM_RUN_CYC */ | ||
209 | { 0x100009, 0x200009 }, /* PM_INST_CMPL */ | ||
210 | { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */ | ||
211 | { 0x300009, 0x400009 }, /* PM_INST_DISP */ | ||
212 | }; | ||
213 | |||
214 | /* | ||
215 | * Scan the alternatives table for a match and return the | ||
216 | * index into the alternatives table if found, else -1. | ||
217 | */ | ||
218 | static int find_alternative(unsigned int event) | ||
219 | { | ||
220 | int i, j; | ||
221 | |||
222 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
223 | if (event < event_alternatives[i][0]) | ||
224 | break; | ||
225 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
226 | if (event == event_alternatives[i][j]) | ||
227 | return i; | ||
228 | } | ||
229 | return -1; | ||
230 | } | ||
231 | |||
232 | static const unsigned char bytedecode_alternatives[4][4] = { | ||
233 | /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, | ||
234 | /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, | ||
235 | /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, | ||
236 | /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } | ||
237 | }; | ||
238 | |||
239 | /* | ||
240 | * Some direct events for decodes of event bus byte 3 have alternative | ||
241 | * PMCSEL values on other counters. This returns the alternative | ||
242 | * event code for those that do, or -1 otherwise. This also handles | ||
243 | * alternative PCMSEL values for add events. | ||
244 | */ | ||
245 | static s64 find_alternative_bdecode(u64 event) | ||
246 | { | ||
247 | int pmc, altpmc, pp, j; | ||
248 | |||
249 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
250 | if (pmc == 0 || pmc > 4) | ||
251 | return -1; | ||
252 | altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ | ||
253 | pp = event & PM_PMCSEL_MSK; | ||
254 | for (j = 0; j < 4; ++j) { | ||
255 | if (bytedecode_alternatives[pmc - 1][j] == pp) { | ||
256 | return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | | ||
257 | (altpmc << PM_PMC_SH) | | ||
258 | bytedecode_alternatives[altpmc - 1][j]; | ||
259 | } | ||
260 | } | ||
261 | |||
262 | /* new decode alternatives for power5+ */ | ||
263 | if (pmc == 1 && (pp == 0x0d || pp == 0x0e)) | ||
264 | return event + (2 << PM_PMC_SH) + (0x2e - 0x0d); | ||
265 | if (pmc == 3 && (pp == 0x2e || pp == 0x2f)) | ||
266 | return event - (2 << PM_PMC_SH) - (0x2e - 0x0d); | ||
267 | |||
268 | /* alternative add event encodings */ | ||
269 | if (pp == 0x10 || pp == 0x28) | ||
270 | return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) | | ||
271 | (altpmc << PM_PMC_SH); | ||
272 | |||
273 | return -1; | ||
274 | } | ||
275 | |||
276 | static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
277 | { | ||
278 | int i, j, nalt = 1; | ||
279 | int nlim; | ||
280 | s64 ae; | ||
281 | |||
282 | alt[0] = event; | ||
283 | nalt = 1; | ||
284 | nlim = power5p_limited_pmc_event(event); | ||
285 | i = find_alternative(event); | ||
286 | if (i >= 0) { | ||
287 | for (j = 0; j < MAX_ALT; ++j) { | ||
288 | ae = event_alternatives[i][j]; | ||
289 | if (ae && ae != event) | ||
290 | alt[nalt++] = ae; | ||
291 | nlim += power5p_limited_pmc_event(ae); | ||
292 | } | ||
293 | } else { | ||
294 | ae = find_alternative_bdecode(event); | ||
295 | if (ae > 0) | ||
296 | alt[nalt++] = ae; | ||
297 | } | ||
298 | |||
299 | if (flags & PPMU_ONLY_COUNT_RUN) { | ||
300 | /* | ||
301 | * We're only counting in RUN state, | ||
302 | * so PM_CYC is equivalent to PM_RUN_CYC | ||
303 | * and PM_INST_CMPL === PM_RUN_INST_CMPL. | ||
304 | * This doesn't include alternatives that don't provide | ||
305 | * any extra flexibility in assigning PMCs (e.g. | ||
306 | * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC). | ||
307 | * Note that even with these additional alternatives | ||
308 | * we never end up with more than 3 alternatives for any event. | ||
309 | */ | ||
310 | j = nalt; | ||
311 | for (i = 0; i < nalt; ++i) { | ||
312 | switch (alt[i]) { | ||
313 | case 0xf: /* PM_CYC */ | ||
314 | alt[j++] = 0x600005; /* PM_RUN_CYC */ | ||
315 | ++nlim; | ||
316 | break; | ||
317 | case 0x600005: /* PM_RUN_CYC */ | ||
318 | alt[j++] = 0xf; | ||
319 | break; | ||
320 | case 0x100009: /* PM_INST_CMPL */ | ||
321 | alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ | ||
322 | ++nlim; | ||
323 | break; | ||
324 | case 0x500009: /* PM_RUN_INST_CMPL */ | ||
325 | alt[j++] = 0x100009; /* PM_INST_CMPL */ | ||
326 | alt[j++] = 0x200009; | ||
327 | break; | ||
328 | } | ||
329 | } | ||
330 | nalt = j; | ||
331 | } | ||
332 | |||
333 | if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { | ||
334 | /* remove the limited PMC events */ | ||
335 | j = 0; | ||
336 | for (i = 0; i < nalt; ++i) { | ||
337 | if (!power5p_limited_pmc_event(alt[i])) { | ||
338 | alt[j] = alt[i]; | ||
339 | ++j; | ||
340 | } | ||
341 | } | ||
342 | nalt = j; | ||
343 | } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { | ||
344 | /* remove all but the limited PMC events */ | ||
345 | j = 0; | ||
346 | for (i = 0; i < nalt; ++i) { | ||
347 | if (power5p_limited_pmc_event(alt[i])) { | ||
348 | alt[j] = alt[i]; | ||
349 | ++j; | ||
350 | } | ||
351 | } | ||
352 | nalt = j; | ||
353 | } | ||
354 | |||
355 | return nalt; | ||
356 | } | ||
357 | |||
358 | /* | ||
359 | * Map of which direct events on which PMCs are marked instruction events. | ||
360 | * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. | ||
361 | * Bit 0 is set if it is marked for all PMCs. | ||
362 | * The 0x80 bit indicates a byte decode PMCSEL value. | ||
363 | */ | ||
364 | static unsigned char direct_event_is_marked[0x28] = { | ||
365 | 0, /* 00 */ | ||
366 | 0x1f, /* 01 PM_IOPS_CMPL */ | ||
367 | 0x2, /* 02 PM_MRK_GRP_DISP */ | ||
368 | 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
369 | 0, /* 04 */ | ||
370 | 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ | ||
371 | 0x80, /* 06 */ | ||
372 | 0x80, /* 07 */ | ||
373 | 0, 0, 0,/* 08 - 0a */ | ||
374 | 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ | ||
375 | 0, /* 0c */ | ||
376 | 0x80, /* 0d */ | ||
377 | 0x80, /* 0e */ | ||
378 | 0, /* 0f */ | ||
379 | 0, /* 10 */ | ||
380 | 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ | ||
381 | 0, /* 12 */ | ||
382 | 0x10, /* 13 PM_MRK_GRP_CMPL */ | ||
383 | 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
384 | 0x2, /* 15 PM_MRK_GRP_ISSUED */ | ||
385 | 0x80, /* 16 */ | ||
386 | 0x80, /* 17 */ | ||
387 | 0, 0, 0, 0, 0, | ||
388 | 0x80, /* 1d */ | ||
389 | 0x80, /* 1e */ | ||
390 | 0, /* 1f */ | ||
391 | 0x80, /* 20 */ | ||
392 | 0x80, /* 21 */ | ||
393 | 0x80, /* 22 */ | ||
394 | 0x80, /* 23 */ | ||
395 | 0x80, /* 24 */ | ||
396 | 0x80, /* 25 */ | ||
397 | 0x80, /* 26 */ | ||
398 | 0x80, /* 27 */ | ||
399 | }; | ||
400 | |||
401 | /* | ||
402 | * Returns 1 if event counts things relating to marked instructions | ||
403 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
404 | */ | ||
405 | static int power5p_marked_instr_event(u64 event) | ||
406 | { | ||
407 | int pmc, psel; | ||
408 | int bit, byte, unit; | ||
409 | u32 mask; | ||
410 | |||
411 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
412 | psel = event & PM_PMCSEL_MSK; | ||
413 | if (pmc >= 5) | ||
414 | return 0; | ||
415 | |||
416 | bit = -1; | ||
417 | if (psel < sizeof(direct_event_is_marked)) { | ||
418 | if (direct_event_is_marked[psel] & (1 << pmc)) | ||
419 | return 1; | ||
420 | if (direct_event_is_marked[psel] & 0x80) | ||
421 | bit = 4; | ||
422 | else if (psel == 0x08) | ||
423 | bit = pmc - 1; | ||
424 | else if (psel == 0x10) | ||
425 | bit = 4 - pmc; | ||
426 | else if (psel == 0x1b && (pmc == 1 || pmc == 3)) | ||
427 | bit = 4; | ||
428 | } else if ((psel & 0x48) == 0x40) { | ||
429 | bit = psel & 7; | ||
430 | } else if (psel == 0x28) { | ||
431 | bit = pmc - 1; | ||
432 | } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) { | ||
433 | bit = 4; | ||
434 | } | ||
435 | |||
436 | if (!(event & PM_BUSEVENT_MSK) || bit == -1) | ||
437 | return 0; | ||
438 | |||
439 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
440 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
441 | if (unit == PM_LSU0) { | ||
442 | /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ | ||
443 | mask = 0x5dff00; | ||
444 | } else if (unit == PM_LSU1 && byte >= 4) { | ||
445 | byte -= 4; | ||
446 | /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */ | ||
447 | mask = 0x5f11c000; | ||
448 | } else | ||
449 | return 0; | ||
450 | |||
451 | return (mask >> (byte * 8 + bit)) & 1; | ||
452 | } | ||
453 | |||
454 | static int power5p_compute_mmcr(u64 event[], int n_ev, | ||
455 | unsigned int hwc[], u64 mmcr[]) | ||
456 | { | ||
457 | u64 mmcr1 = 0; | ||
458 | u64 mmcra = 0; | ||
459 | unsigned int pmc, unit, byte, psel; | ||
460 | unsigned int ttm; | ||
461 | int i, isbus, bit, grsel; | ||
462 | unsigned int pmc_inuse = 0; | ||
463 | unsigned char busbyte[4]; | ||
464 | unsigned char unituse[16]; | ||
465 | int ttmuse; | ||
466 | |||
467 | if (n_ev > 6) | ||
468 | return -1; | ||
469 | |||
470 | /* First pass to count resource use */ | ||
471 | memset(busbyte, 0, sizeof(busbyte)); | ||
472 | memset(unituse, 0, sizeof(unituse)); | ||
473 | for (i = 0; i < n_ev; ++i) { | ||
474 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
475 | if (pmc) { | ||
476 | if (pmc > 6) | ||
477 | return -1; | ||
478 | if (pmc_inuse & (1 << (pmc - 1))) | ||
479 | return -1; | ||
480 | pmc_inuse |= 1 << (pmc - 1); | ||
481 | } | ||
482 | if (event[i] & PM_BUSEVENT_MSK) { | ||
483 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
484 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
485 | if (unit > PM_LASTUNIT) | ||
486 | return -1; | ||
487 | if (unit == PM_ISU0_ALT) | ||
488 | unit = PM_ISU0; | ||
489 | if (byte >= 4) { | ||
490 | if (unit != PM_LSU1) | ||
491 | return -1; | ||
492 | ++unit; | ||
493 | byte &= 3; | ||
494 | } | ||
495 | if (busbyte[byte] && busbyte[byte] != unit) | ||
496 | return -1; | ||
497 | busbyte[byte] = unit; | ||
498 | unituse[unit] = 1; | ||
499 | } | ||
500 | } | ||
501 | |||
502 | /* | ||
503 | * Assign resources and set multiplexer selects. | ||
504 | * | ||
505 | * PM_ISU0 can go either on TTM0 or TTM1, but that's the only | ||
506 | * choice we have to deal with. | ||
507 | */ | ||
508 | if (unituse[PM_ISU0] & | ||
509 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { | ||
510 | unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ | ||
511 | unituse[PM_ISU0] = 0; | ||
512 | } | ||
513 | /* Set TTM[01]SEL fields. */ | ||
514 | ttmuse = 0; | ||
515 | for (i = PM_FPU; i <= PM_ISU1; ++i) { | ||
516 | if (!unituse[i]) | ||
517 | continue; | ||
518 | if (ttmuse++) | ||
519 | return -1; | ||
520 | mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; | ||
521 | } | ||
522 | ttmuse = 0; | ||
523 | for (; i <= PM_GRS; ++i) { | ||
524 | if (!unituse[i]) | ||
525 | continue; | ||
526 | if (ttmuse++) | ||
527 | return -1; | ||
528 | mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; | ||
529 | } | ||
530 | if (ttmuse > 1) | ||
531 | return -1; | ||
532 | |||
533 | /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ | ||
534 | for (byte = 0; byte < 4; ++byte) { | ||
535 | unit = busbyte[byte]; | ||
536 | if (!unit) | ||
537 | continue; | ||
538 | if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { | ||
539 | /* get ISU0 through TTM1 rather than TTM0 */ | ||
540 | unit = PM_ISU0_ALT; | ||
541 | } else if (unit == PM_LSU1 + 1) { | ||
542 | /* select lower word of LSU1 for this byte */ | ||
543 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
544 | } | ||
545 | ttm = unit >> 2; | ||
546 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
547 | } | ||
548 | |||
549 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
550 | for (i = 0; i < n_ev; ++i) { | ||
551 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
552 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
553 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
554 | psel = event[i] & PM_PMCSEL_MSK; | ||
555 | isbus = event[i] & PM_BUSEVENT_MSK; | ||
556 | if (!pmc) { | ||
557 | /* Bus event or any-PMC direct event */ | ||
558 | for (pmc = 0; pmc < 4; ++pmc) { | ||
559 | if (!(pmc_inuse & (1 << pmc))) | ||
560 | break; | ||
561 | } | ||
562 | if (pmc >= 4) | ||
563 | return -1; | ||
564 | pmc_inuse |= 1 << pmc; | ||
565 | } else if (pmc <= 4) { | ||
566 | /* Direct event */ | ||
567 | --pmc; | ||
568 | if (isbus && (byte & 2) && | ||
569 | (psel == 8 || psel == 0x10 || psel == 0x28)) | ||
570 | /* add events on higher-numbered bus */ | ||
571 | mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); | ||
572 | } else { | ||
573 | /* Instructions or run cycles on PMC5/6 */ | ||
574 | --pmc; | ||
575 | } | ||
576 | if (isbus && unit == PM_GRS) { | ||
577 | bit = psel & 7; | ||
578 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; | ||
579 | mmcr1 |= (u64)grsel << grsel_shift[bit]; | ||
580 | } | ||
581 | if (power5p_marked_instr_event(event[i])) | ||
582 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
583 | if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1)) | ||
584 | /* select alternate byte lane */ | ||
585 | psel |= 0x10; | ||
586 | if (pmc <= 3) | ||
587 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
588 | hwc[i] = pmc; | ||
589 | } | ||
590 | |||
591 | /* Return MMCRx values */ | ||
592 | mmcr[0] = 0; | ||
593 | if (pmc_inuse & 1) | ||
594 | mmcr[0] = MMCR0_PMC1CE; | ||
595 | if (pmc_inuse & 0x3e) | ||
596 | mmcr[0] |= MMCR0_PMCjCE; | ||
597 | mmcr[1] = mmcr1; | ||
598 | mmcr[2] = mmcra; | ||
599 | return 0; | ||
600 | } | ||
601 | |||
602 | static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
603 | { | ||
604 | if (pmc <= 3) | ||
605 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); | ||
606 | } | ||
607 | |||
608 | static int power5p_generic_events[] = { | ||
609 | [PERF_COUNT_HW_CPU_CYCLES] = 0xf, | ||
610 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, | ||
611 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */ | ||
612 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ | ||
613 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ | ||
614 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ | ||
615 | }; | ||
616 | |||
617 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
618 | |||
619 | /* | ||
620 | * Table of generalized cache-related events. | ||
621 | * 0 means not supported, -1 means nonsensical, other values | ||
622 | * are event codes. | ||
623 | */ | ||
624 | static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
625 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
626 | [C(OP_READ)] = { 0x1c10a8, 0x3c1088 }, | ||
627 | [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 }, | ||
628 | [C(OP_PREFETCH)] = { 0xc70e7, -1 }, | ||
629 | }, | ||
630 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
631 | [C(OP_READ)] = { 0, 0 }, | ||
632 | [C(OP_WRITE)] = { -1, -1 }, | ||
633 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
634 | }, | ||
635 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
636 | [C(OP_READ)] = { 0, 0 }, | ||
637 | [C(OP_WRITE)] = { 0, 0 }, | ||
638 | [C(OP_PREFETCH)] = { 0xc50c3, 0 }, | ||
639 | }, | ||
640 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
641 | [C(OP_READ)] = { 0xc20e4, 0x800c4 }, | ||
642 | [C(OP_WRITE)] = { -1, -1 }, | ||
643 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
644 | }, | ||
645 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
646 | [C(OP_READ)] = { 0, 0x800c0 }, | ||
647 | [C(OP_WRITE)] = { -1, -1 }, | ||
648 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
649 | }, | ||
650 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
651 | [C(OP_READ)] = { 0x230e4, 0x230e5 }, | ||
652 | [C(OP_WRITE)] = { -1, -1 }, | ||
653 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
654 | }, | ||
655 | }; | ||
656 | |||
657 | struct power_pmu power5p_pmu = { | ||
658 | .n_counter = 6, | ||
659 | .max_alternatives = MAX_ALT, | ||
660 | .add_fields = 0x7000000000055ull, | ||
661 | .test_adder = 0x3000040000000ull, | ||
662 | .compute_mmcr = power5p_compute_mmcr, | ||
663 | .get_constraint = power5p_get_constraint, | ||
664 | .get_alternatives = power5p_get_alternatives, | ||
665 | .disable_pmc = power5p_disable_pmc, | ||
666 | .limited_pmc_event = power5p_limited_pmc_event, | ||
667 | .flags = PPMU_LIMITED_PMC5_6, | ||
668 | .n_generic = ARRAY_SIZE(power5p_generic_events), | ||
669 | .generic_events = power5p_generic_events, | ||
670 | .cache_events = &power5p_cache_events, | ||
671 | }; | ||
diff --git a/arch/powerpc/kernel/power5-pmu.c b/arch/powerpc/kernel/power5-pmu.c new file mode 100644 index 000000000000..05600b66221a --- /dev/null +++ b/arch/powerpc/kernel/power5-pmu.c | |||
@@ -0,0 +1,611 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER5 (not POWER5++) processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for POWER5 (not POWER5++) | ||
17 | */ | ||
18 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0xf | ||
20 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
21 | #define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ | ||
22 | #define PM_UNIT_MSK 0xf | ||
23 | #define PM_BYTE_SH 12 /* Byte number of event bus to use */ | ||
24 | #define PM_BYTE_MSK 7 | ||
25 | #define PM_GRS_SH 8 /* Storage subsystem mux select */ | ||
26 | #define PM_GRS_MSK 7 | ||
27 | #define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ | ||
28 | #define PM_PMCSEL_MSK 0x7f | ||
29 | |||
30 | /* Values in PM_UNIT field */ | ||
31 | #define PM_FPU 0 | ||
32 | #define PM_ISU0 1 | ||
33 | #define PM_IFU 2 | ||
34 | #define PM_ISU1 3 | ||
35 | #define PM_IDU 4 | ||
36 | #define PM_ISU0_ALT 6 | ||
37 | #define PM_GRS 7 | ||
38 | #define PM_LSU0 8 | ||
39 | #define PM_LSU1 0xc | ||
40 | #define PM_LASTUNIT 0xc | ||
41 | |||
42 | /* | ||
43 | * Bits in MMCR1 for POWER5 | ||
44 | */ | ||
45 | #define MMCR1_TTM0SEL_SH 62 | ||
46 | #define MMCR1_TTM1SEL_SH 60 | ||
47 | #define MMCR1_TTM2SEL_SH 58 | ||
48 | #define MMCR1_TTM3SEL_SH 56 | ||
49 | #define MMCR1_TTMSEL_MSK 3 | ||
50 | #define MMCR1_TD_CP_DBG0SEL_SH 54 | ||
51 | #define MMCR1_TD_CP_DBG1SEL_SH 52 | ||
52 | #define MMCR1_TD_CP_DBG2SEL_SH 50 | ||
53 | #define MMCR1_TD_CP_DBG3SEL_SH 48 | ||
54 | #define MMCR1_GRS_L2SEL_SH 46 | ||
55 | #define MMCR1_GRS_L2SEL_MSK 3 | ||
56 | #define MMCR1_GRS_L3SEL_SH 44 | ||
57 | #define MMCR1_GRS_L3SEL_MSK 3 | ||
58 | #define MMCR1_GRS_MCSEL_SH 41 | ||
59 | #define MMCR1_GRS_MCSEL_MSK 7 | ||
60 | #define MMCR1_GRS_FABSEL_SH 39 | ||
61 | #define MMCR1_GRS_FABSEL_MSK 3 | ||
62 | #define MMCR1_PMC1_ADDER_SEL_SH 35 | ||
63 | #define MMCR1_PMC2_ADDER_SEL_SH 34 | ||
64 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
65 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
66 | #define MMCR1_PMC1SEL_SH 25 | ||
67 | #define MMCR1_PMC2SEL_SH 17 | ||
68 | #define MMCR1_PMC3SEL_SH 9 | ||
69 | #define MMCR1_PMC4SEL_SH 1 | ||
70 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
71 | #define MMCR1_PMCSEL_MSK 0x7f | ||
72 | |||
73 | /* | ||
74 | * Bits in MMCRA | ||
75 | */ | ||
76 | |||
77 | /* | ||
78 | * Layout of constraint bits: | ||
79 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
80 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
81 | * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><> | ||
82 | * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1 | ||
83 | * | ||
84 | * T0 - TTM0 constraint | ||
85 | * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000 | ||
86 | * | ||
87 | * T1 - TTM1 constraint | ||
88 | * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000 | ||
89 | * | ||
90 | * NC - number of counters | ||
91 | * 51: NC error 0x0008_0000_0000_0000 | ||
92 | * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 | ||
93 | * | ||
94 | * G0..G3 - GRS mux constraints | ||
95 | * 46-47: GRS_L2SEL value | ||
96 | * 44-45: GRS_L3SEL value | ||
97 | * 41-44: GRS_MCSEL value | ||
98 | * 39-40: GRS_FABSEL value | ||
99 | * Note that these match up with their bit positions in MMCR1 | ||
100 | * | ||
101 | * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS | ||
102 | * 37: UC3 error 0x20_0000_0000 | ||
103 | * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000 | ||
104 | * 35: ISU0 events needed 0x08_0000_0000 | ||
105 | * 34: IDU|GRS events needed 0x04_0000_0000 | ||
106 | * | ||
107 | * PS1 | ||
108 | * 33: PS1 error 0x2_0000_0000 | ||
109 | * 31-32: count of events needing PMC1/2 0x1_8000_0000 | ||
110 | * | ||
111 | * PS2 | ||
112 | * 30: PS2 error 0x4000_0000 | ||
113 | * 28-29: count of events needing PMC3/4 0x3000_0000 | ||
114 | * | ||
115 | * B0 | ||
116 | * 24-27: Byte 0 event source 0x0f00_0000 | ||
117 | * Encoding as for the event code | ||
118 | * | ||
119 | * B1, B2, B3 | ||
120 | * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources | ||
121 | * | ||
122 | * P1..P6 | ||
123 | * 0-11: Count of events needing PMC1..PMC6 | ||
124 | */ | ||
125 | |||
126 | static const int grsel_shift[8] = { | ||
127 | MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, | ||
128 | MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, | ||
129 | MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH | ||
130 | }; | ||
131 | |||
132 | /* Masks and values for using events from the various units */ | ||
133 | static u64 unit_cons[PM_LASTUNIT+1][2] = { | ||
134 | [PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull }, | ||
135 | [PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull }, | ||
136 | [PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull }, | ||
137 | [PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull }, | ||
138 | [PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull }, | ||
139 | [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull }, | ||
140 | }; | ||
141 | |||
142 | static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) | ||
143 | { | ||
144 | int pmc, byte, unit, sh; | ||
145 | int bit, fmask; | ||
146 | u64 mask = 0, value = 0; | ||
147 | int grp = -1; | ||
148 | |||
149 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
150 | if (pmc) { | ||
151 | if (pmc > 6) | ||
152 | return -1; | ||
153 | sh = (pmc - 1) * 2; | ||
154 | mask |= 2 << sh; | ||
155 | value |= 1 << sh; | ||
156 | if (pmc <= 4) | ||
157 | grp = (pmc - 1) >> 1; | ||
158 | else if (event != 0x500009 && event != 0x600005) | ||
159 | return -1; | ||
160 | } | ||
161 | if (event & PM_BUSEVENT_MSK) { | ||
162 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
163 | if (unit > PM_LASTUNIT) | ||
164 | return -1; | ||
165 | if (unit == PM_ISU0_ALT) | ||
166 | unit = PM_ISU0; | ||
167 | mask |= unit_cons[unit][0]; | ||
168 | value |= unit_cons[unit][1]; | ||
169 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
170 | if (byte >= 4) { | ||
171 | if (unit != PM_LSU1) | ||
172 | return -1; | ||
173 | /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ | ||
174 | ++unit; | ||
175 | byte &= 3; | ||
176 | } | ||
177 | if (unit == PM_GRS) { | ||
178 | bit = event & 7; | ||
179 | fmask = (bit == 6)? 7: 3; | ||
180 | sh = grsel_shift[bit]; | ||
181 | mask |= (u64)fmask << sh; | ||
182 | value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; | ||
183 | } | ||
184 | /* | ||
185 | * Bus events on bytes 0 and 2 can be counted | ||
186 | * on PMC1/2; bytes 1 and 3 on PMC3/4. | ||
187 | */ | ||
188 | if (!pmc) | ||
189 | grp = byte & 1; | ||
190 | /* Set byte lane select field */ | ||
191 | mask |= 0xfULL << (24 - 4 * byte); | ||
192 | value |= (u64)unit << (24 - 4 * byte); | ||
193 | } | ||
194 | if (grp == 0) { | ||
195 | /* increment PMC1/2 field */ | ||
196 | mask |= 0x200000000ull; | ||
197 | value |= 0x080000000ull; | ||
198 | } else if (grp == 1) { | ||
199 | /* increment PMC3/4 field */ | ||
200 | mask |= 0x40000000ull; | ||
201 | value |= 0x10000000ull; | ||
202 | } | ||
203 | if (pmc < 5) { | ||
204 | /* need a counter from PMC1-4 set */ | ||
205 | mask |= 0x8000000000000ull; | ||
206 | value |= 0x1000000000000ull; | ||
207 | } | ||
208 | *maskp = mask; | ||
209 | *valp = value; | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | #define MAX_ALT 3 /* at most 3 alternatives for any event */ | ||
214 | |||
215 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
216 | { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ | ||
217 | { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ | ||
218 | { 0x100005, 0x600005 }, /* PM_RUN_CYC */ | ||
219 | { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */ | ||
220 | { 0x300009, 0x400009 }, /* PM_INST_DISP */ | ||
221 | }; | ||
222 | |||
223 | /* | ||
224 | * Scan the alternatives table for a match and return the | ||
225 | * index into the alternatives table if found, else -1. | ||
226 | */ | ||
227 | static int find_alternative(u64 event) | ||
228 | { | ||
229 | int i, j; | ||
230 | |||
231 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
232 | if (event < event_alternatives[i][0]) | ||
233 | break; | ||
234 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
235 | if (event == event_alternatives[i][j]) | ||
236 | return i; | ||
237 | } | ||
238 | return -1; | ||
239 | } | ||
240 | |||
241 | static const unsigned char bytedecode_alternatives[4][4] = { | ||
242 | /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, | ||
243 | /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, | ||
244 | /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, | ||
245 | /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } | ||
246 | }; | ||
247 | |||
248 | /* | ||
249 | * Some direct events for decodes of event bus byte 3 have alternative | ||
250 | * PMCSEL values on other counters. This returns the alternative | ||
251 | * event code for those that do, or -1 otherwise. | ||
252 | */ | ||
253 | static s64 find_alternative_bdecode(u64 event) | ||
254 | { | ||
255 | int pmc, altpmc, pp, j; | ||
256 | |||
257 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
258 | if (pmc == 0 || pmc > 4) | ||
259 | return -1; | ||
260 | altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ | ||
261 | pp = event & PM_PMCSEL_MSK; | ||
262 | for (j = 0; j < 4; ++j) { | ||
263 | if (bytedecode_alternatives[pmc - 1][j] == pp) { | ||
264 | return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | | ||
265 | (altpmc << PM_PMC_SH) | | ||
266 | bytedecode_alternatives[altpmc - 1][j]; | ||
267 | } | ||
268 | } | ||
269 | return -1; | ||
270 | } | ||
271 | |||
272 | static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
273 | { | ||
274 | int i, j, nalt = 1; | ||
275 | s64 ae; | ||
276 | |||
277 | alt[0] = event; | ||
278 | nalt = 1; | ||
279 | i = find_alternative(event); | ||
280 | if (i >= 0) { | ||
281 | for (j = 0; j < MAX_ALT; ++j) { | ||
282 | ae = event_alternatives[i][j]; | ||
283 | if (ae && ae != event) | ||
284 | alt[nalt++] = ae; | ||
285 | } | ||
286 | } else { | ||
287 | ae = find_alternative_bdecode(event); | ||
288 | if (ae > 0) | ||
289 | alt[nalt++] = ae; | ||
290 | } | ||
291 | return nalt; | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * Map of which direct events on which PMCs are marked instruction events. | ||
296 | * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. | ||
297 | * Bit 0 is set if it is marked for all PMCs. | ||
298 | * The 0x80 bit indicates a byte decode PMCSEL value. | ||
299 | */ | ||
300 | static unsigned char direct_event_is_marked[0x28] = { | ||
301 | 0, /* 00 */ | ||
302 | 0x1f, /* 01 PM_IOPS_CMPL */ | ||
303 | 0x2, /* 02 PM_MRK_GRP_DISP */ | ||
304 | 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
305 | 0, /* 04 */ | ||
306 | 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ | ||
307 | 0x80, /* 06 */ | ||
308 | 0x80, /* 07 */ | ||
309 | 0, 0, 0,/* 08 - 0a */ | ||
310 | 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ | ||
311 | 0, /* 0c */ | ||
312 | 0x80, /* 0d */ | ||
313 | 0x80, /* 0e */ | ||
314 | 0, /* 0f */ | ||
315 | 0, /* 10 */ | ||
316 | 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ | ||
317 | 0, /* 12 */ | ||
318 | 0x10, /* 13 PM_MRK_GRP_CMPL */ | ||
319 | 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
320 | 0x2, /* 15 PM_MRK_GRP_ISSUED */ | ||
321 | 0x80, /* 16 */ | ||
322 | 0x80, /* 17 */ | ||
323 | 0, 0, 0, 0, 0, | ||
324 | 0x80, /* 1d */ | ||
325 | 0x80, /* 1e */ | ||
326 | 0, /* 1f */ | ||
327 | 0x80, /* 20 */ | ||
328 | 0x80, /* 21 */ | ||
329 | 0x80, /* 22 */ | ||
330 | 0x80, /* 23 */ | ||
331 | 0x80, /* 24 */ | ||
332 | 0x80, /* 25 */ | ||
333 | 0x80, /* 26 */ | ||
334 | 0x80, /* 27 */ | ||
335 | }; | ||
336 | |||
337 | /* | ||
338 | * Returns 1 if event counts things relating to marked instructions | ||
339 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
340 | */ | ||
341 | static int power5_marked_instr_event(u64 event) | ||
342 | { | ||
343 | int pmc, psel; | ||
344 | int bit, byte, unit; | ||
345 | u32 mask; | ||
346 | |||
347 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
348 | psel = event & PM_PMCSEL_MSK; | ||
349 | if (pmc >= 5) | ||
350 | return 0; | ||
351 | |||
352 | bit = -1; | ||
353 | if (psel < sizeof(direct_event_is_marked)) { | ||
354 | if (direct_event_is_marked[psel] & (1 << pmc)) | ||
355 | return 1; | ||
356 | if (direct_event_is_marked[psel] & 0x80) | ||
357 | bit = 4; | ||
358 | else if (psel == 0x08) | ||
359 | bit = pmc - 1; | ||
360 | else if (psel == 0x10) | ||
361 | bit = 4 - pmc; | ||
362 | else if (psel == 0x1b && (pmc == 1 || pmc == 3)) | ||
363 | bit = 4; | ||
364 | } else if ((psel & 0x58) == 0x40) | ||
365 | bit = psel & 7; | ||
366 | |||
367 | if (!(event & PM_BUSEVENT_MSK)) | ||
368 | return 0; | ||
369 | |||
370 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
371 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
372 | if (unit == PM_LSU0) { | ||
373 | /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ | ||
374 | mask = 0x5dff00; | ||
375 | } else if (unit == PM_LSU1 && byte >= 4) { | ||
376 | byte -= 4; | ||
377 | /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */ | ||
378 | mask = 0x5f00c0aa; | ||
379 | } else | ||
380 | return 0; | ||
381 | |||
382 | return (mask >> (byte * 8 + bit)) & 1; | ||
383 | } | ||
384 | |||
385 | static int power5_compute_mmcr(u64 event[], int n_ev, | ||
386 | unsigned int hwc[], u64 mmcr[]) | ||
387 | { | ||
388 | u64 mmcr1 = 0; | ||
389 | u64 mmcra = 0; | ||
390 | unsigned int pmc, unit, byte, psel; | ||
391 | unsigned int ttm, grp; | ||
392 | int i, isbus, bit, grsel; | ||
393 | unsigned int pmc_inuse = 0; | ||
394 | unsigned int pmc_grp_use[2]; | ||
395 | unsigned char busbyte[4]; | ||
396 | unsigned char unituse[16]; | ||
397 | int ttmuse; | ||
398 | |||
399 | if (n_ev > 6) | ||
400 | return -1; | ||
401 | |||
402 | /* First pass to count resource use */ | ||
403 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
404 | memset(busbyte, 0, sizeof(busbyte)); | ||
405 | memset(unituse, 0, sizeof(unituse)); | ||
406 | for (i = 0; i < n_ev; ++i) { | ||
407 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
408 | if (pmc) { | ||
409 | if (pmc > 6) | ||
410 | return -1; | ||
411 | if (pmc_inuse & (1 << (pmc - 1))) | ||
412 | return -1; | ||
413 | pmc_inuse |= 1 << (pmc - 1); | ||
414 | /* count 1/2 vs 3/4 use */ | ||
415 | if (pmc <= 4) | ||
416 | ++pmc_grp_use[(pmc - 1) >> 1]; | ||
417 | } | ||
418 | if (event[i] & PM_BUSEVENT_MSK) { | ||
419 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
420 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
421 | if (unit > PM_LASTUNIT) | ||
422 | return -1; | ||
423 | if (unit == PM_ISU0_ALT) | ||
424 | unit = PM_ISU0; | ||
425 | if (byte >= 4) { | ||
426 | if (unit != PM_LSU1) | ||
427 | return -1; | ||
428 | ++unit; | ||
429 | byte &= 3; | ||
430 | } | ||
431 | if (!pmc) | ||
432 | ++pmc_grp_use[byte & 1]; | ||
433 | if (busbyte[byte] && busbyte[byte] != unit) | ||
434 | return -1; | ||
435 | busbyte[byte] = unit; | ||
436 | unituse[unit] = 1; | ||
437 | } | ||
438 | } | ||
439 | if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2) | ||
440 | return -1; | ||
441 | |||
442 | /* | ||
443 | * Assign resources and set multiplexer selects. | ||
444 | * | ||
445 | * PM_ISU0 can go either on TTM0 or TTM1, but that's the only | ||
446 | * choice we have to deal with. | ||
447 | */ | ||
448 | if (unituse[PM_ISU0] & | ||
449 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { | ||
450 | unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ | ||
451 | unituse[PM_ISU0] = 0; | ||
452 | } | ||
453 | /* Set TTM[01]SEL fields. */ | ||
454 | ttmuse = 0; | ||
455 | for (i = PM_FPU; i <= PM_ISU1; ++i) { | ||
456 | if (!unituse[i]) | ||
457 | continue; | ||
458 | if (ttmuse++) | ||
459 | return -1; | ||
460 | mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; | ||
461 | } | ||
462 | ttmuse = 0; | ||
463 | for (; i <= PM_GRS; ++i) { | ||
464 | if (!unituse[i]) | ||
465 | continue; | ||
466 | if (ttmuse++) | ||
467 | return -1; | ||
468 | mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; | ||
469 | } | ||
470 | if (ttmuse > 1) | ||
471 | return -1; | ||
472 | |||
473 | /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ | ||
474 | for (byte = 0; byte < 4; ++byte) { | ||
475 | unit = busbyte[byte]; | ||
476 | if (!unit) | ||
477 | continue; | ||
478 | if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { | ||
479 | /* get ISU0 through TTM1 rather than TTM0 */ | ||
480 | unit = PM_ISU0_ALT; | ||
481 | } else if (unit == PM_LSU1 + 1) { | ||
482 | /* select lower word of LSU1 for this byte */ | ||
483 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
484 | } | ||
485 | ttm = unit >> 2; | ||
486 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
487 | } | ||
488 | |||
489 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
490 | for (i = 0; i < n_ev; ++i) { | ||
491 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
492 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
493 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
494 | psel = event[i] & PM_PMCSEL_MSK; | ||
495 | isbus = event[i] & PM_BUSEVENT_MSK; | ||
496 | if (!pmc) { | ||
497 | /* Bus event or any-PMC direct event */ | ||
498 | for (pmc = 0; pmc < 4; ++pmc) { | ||
499 | if (pmc_inuse & (1 << pmc)) | ||
500 | continue; | ||
501 | grp = (pmc >> 1) & 1; | ||
502 | if (isbus) { | ||
503 | if (grp == (byte & 1)) | ||
504 | break; | ||
505 | } else if (pmc_grp_use[grp] < 2) { | ||
506 | ++pmc_grp_use[grp]; | ||
507 | break; | ||
508 | } | ||
509 | } | ||
510 | pmc_inuse |= 1 << pmc; | ||
511 | } else if (pmc <= 4) { | ||
512 | /* Direct event */ | ||
513 | --pmc; | ||
514 | if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) | ||
515 | /* add events on higher-numbered bus */ | ||
516 | mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); | ||
517 | } else { | ||
518 | /* Instructions or run cycles on PMC5/6 */ | ||
519 | --pmc; | ||
520 | } | ||
521 | if (isbus && unit == PM_GRS) { | ||
522 | bit = psel & 7; | ||
523 | grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; | ||
524 | mmcr1 |= (u64)grsel << grsel_shift[bit]; | ||
525 | } | ||
526 | if (power5_marked_instr_event(event[i])) | ||
527 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
528 | if (pmc <= 3) | ||
529 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
530 | hwc[i] = pmc; | ||
531 | } | ||
532 | |||
533 | /* Return MMCRx values */ | ||
534 | mmcr[0] = 0; | ||
535 | if (pmc_inuse & 1) | ||
536 | mmcr[0] = MMCR0_PMC1CE; | ||
537 | if (pmc_inuse & 0x3e) | ||
538 | mmcr[0] |= MMCR0_PMCjCE; | ||
539 | mmcr[1] = mmcr1; | ||
540 | mmcr[2] = mmcra; | ||
541 | return 0; | ||
542 | } | ||
543 | |||
544 | static void power5_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
545 | { | ||
546 | if (pmc <= 3) | ||
547 | mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); | ||
548 | } | ||
549 | |||
550 | static int power5_generic_events[] = { | ||
551 | [PERF_COUNT_HW_CPU_CYCLES] = 0xf, | ||
552 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, | ||
553 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */ | ||
554 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ | ||
555 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ | ||
556 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ | ||
557 | }; | ||
558 | |||
559 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
560 | |||
561 | /* | ||
562 | * Table of generalized cache-related events. | ||
563 | * 0 means not supported, -1 means nonsensical, other values | ||
564 | * are event codes. | ||
565 | */ | ||
566 | static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
567 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
568 | [C(OP_READ)] = { 0x4c1090, 0x3c1088 }, | ||
569 | [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 }, | ||
570 | [C(OP_PREFETCH)] = { 0xc70e7, 0 }, | ||
571 | }, | ||
572 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
573 | [C(OP_READ)] = { 0, 0 }, | ||
574 | [C(OP_WRITE)] = { -1, -1 }, | ||
575 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
576 | }, | ||
577 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
578 | [C(OP_READ)] = { 0, 0x3c309b }, | ||
579 | [C(OP_WRITE)] = { 0, 0 }, | ||
580 | [C(OP_PREFETCH)] = { 0xc50c3, 0 }, | ||
581 | }, | ||
582 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
583 | [C(OP_READ)] = { 0x2c4090, 0x800c4 }, | ||
584 | [C(OP_WRITE)] = { -1, -1 }, | ||
585 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
586 | }, | ||
587 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
588 | [C(OP_READ)] = { 0, 0x800c0 }, | ||
589 | [C(OP_WRITE)] = { -1, -1 }, | ||
590 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
591 | }, | ||
592 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
593 | [C(OP_READ)] = { 0x230e4, 0x230e5 }, | ||
594 | [C(OP_WRITE)] = { -1, -1 }, | ||
595 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
596 | }, | ||
597 | }; | ||
598 | |||
599 | struct power_pmu power5_pmu = { | ||
600 | .n_counter = 6, | ||
601 | .max_alternatives = MAX_ALT, | ||
602 | .add_fields = 0x7000090000555ull, | ||
603 | .test_adder = 0x3000490000000ull, | ||
604 | .compute_mmcr = power5_compute_mmcr, | ||
605 | .get_constraint = power5_get_constraint, | ||
606 | .get_alternatives = power5_get_alternatives, | ||
607 | .disable_pmc = power5_disable_pmc, | ||
608 | .n_generic = ARRAY_SIZE(power5_generic_events), | ||
609 | .generic_events = power5_generic_events, | ||
610 | .cache_events = &power5_cache_events, | ||
611 | }; | ||
diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c new file mode 100644 index 000000000000..46f74bebcfd9 --- /dev/null +++ b/arch/powerpc/kernel/power6-pmu.c | |||
@@ -0,0 +1,532 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER6 processors. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for POWER6 | ||
17 | */ | ||
18 | #define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0x7 | ||
20 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
21 | #define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */ | ||
22 | #define PM_UNIT_MSK 0xf | ||
23 | #define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH) | ||
24 | #define PM_LLAV 0x8000 /* Load lookahead match value */ | ||
25 | #define PM_LLA 0x4000 /* Load lookahead match enable */ | ||
26 | #define PM_BYTE_SH 12 /* Byte of event bus to use */ | ||
27 | #define PM_BYTE_MSK 3 | ||
28 | #define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */ | ||
29 | #define PM_SUBUNIT_MSK 7 | ||
30 | #define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH) | ||
31 | #define PM_PMCSEL_MSK 0xff /* PMCxSEL value */ | ||
32 | #define PM_BUSEVENT_MSK 0xf3700 | ||
33 | |||
34 | /* | ||
35 | * Bits in MMCR1 for POWER6 | ||
36 | */ | ||
37 | #define MMCR1_TTM0SEL_SH 60 | ||
38 | #define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4) | ||
39 | #define MMCR1_TTMSEL_MSK 0xf | ||
40 | #define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK) | ||
41 | #define MMCR1_NESTSEL_SH 45 | ||
42 | #define MMCR1_NESTSEL_MSK 0x7 | ||
43 | #define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK) | ||
44 | #define MMCR1_PMC1_LLA ((u64)1 << 44) | ||
45 | #define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39) | ||
46 | #define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35) | ||
47 | #define MMCR1_PMC1SEL_SH 24 | ||
48 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
49 | #define MMCR1_PMCSEL_MSK 0xff | ||
50 | |||
51 | /* | ||
52 | * Map of which direct events on which PMCs are marked instruction events. | ||
53 | * Indexed by PMCSEL value >> 1. | ||
54 | * Bottom 4 bits are a map of which PMCs are interesting, | ||
55 | * top 4 bits say what sort of event: | ||
56 | * 0 = direct marked event, | ||
57 | * 1 = byte decode event, | ||
58 | * 4 = add/and event (PMC1 -> bits 0 & 4), | ||
59 | * 5 = add/and event (PMC1 -> bits 1 & 5), | ||
60 | * 6 = add/and event (PMC1 -> bits 2 & 6), | ||
61 | * 7 = add/and event (PMC1 -> bits 3 & 7). | ||
62 | */ | ||
63 | static unsigned char direct_event_is_marked[0x60 >> 1] = { | ||
64 | 0, /* 00 */ | ||
65 | 0, /* 02 */ | ||
66 | 0, /* 04 */ | ||
67 | 0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ | ||
68 | 0x04, /* 08 PM_MRK_DFU_FIN */ | ||
69 | 0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */ | ||
70 | 0, /* 0c */ | ||
71 | 0, /* 0e */ | ||
72 | 0x02, /* 10 PM_MRK_INST_DISP */ | ||
73 | 0x08, /* 12 PM_MRK_LSU_DERAT_MISS */ | ||
74 | 0, /* 14 */ | ||
75 | 0, /* 16 */ | ||
76 | 0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */ | ||
77 | 0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */ | ||
78 | 0x01, /* 1c PM_MRK_INST_ISSUED */ | ||
79 | 0, /* 1e */ | ||
80 | 0, /* 20 */ | ||
81 | 0, /* 22 */ | ||
82 | 0, /* 24 */ | ||
83 | 0, /* 26 */ | ||
84 | 0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */ | ||
85 | 0, /* 2a */ | ||
86 | 0, /* 2c */ | ||
87 | 0, /* 2e */ | ||
88 | 0x4f, /* 30 */ | ||
89 | 0x7f, /* 32 */ | ||
90 | 0x4f, /* 34 */ | ||
91 | 0x5f, /* 36 */ | ||
92 | 0x6f, /* 38 */ | ||
93 | 0x4f, /* 3a */ | ||
94 | 0, /* 3c */ | ||
95 | 0x08, /* 3e PM_MRK_INST_TIMEO */ | ||
96 | 0x1f, /* 40 */ | ||
97 | 0x1f, /* 42 */ | ||
98 | 0x1f, /* 44 */ | ||
99 | 0x1f, /* 46 */ | ||
100 | 0x1f, /* 48 */ | ||
101 | 0x1f, /* 4a */ | ||
102 | 0x1f, /* 4c */ | ||
103 | 0x1f, /* 4e */ | ||
104 | 0, /* 50 */ | ||
105 | 0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */ | ||
106 | 0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */ | ||
107 | 0x02, /* 56 PM_MRK_LD_MISS_L1 */ | ||
108 | 0, /* 58 */ | ||
109 | 0, /* 5a */ | ||
110 | 0, /* 5c */ | ||
111 | 0, /* 5e */ | ||
112 | }; | ||
113 | |||
114 | /* | ||
115 | * Masks showing for each unit which bits are marked events. | ||
116 | * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0. | ||
117 | */ | ||
118 | static u32 marked_bus_events[16] = { | ||
119 | 0x01000000, /* direct events set 1: byte 3 bit 0 */ | ||
120 | 0x00010000, /* direct events set 2: byte 2 bit 0 */ | ||
121 | 0, 0, 0, 0, /* IDU, IFU, nest: nothing */ | ||
122 | 0x00000088, /* VMX set 1: byte 0 bits 3, 7 */ | ||
123 | 0x000000c0, /* VMX set 2: byte 0 bits 4-7 */ | ||
124 | 0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */ | ||
125 | 0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */ | ||
126 | 0, /* LSU set 3 */ | ||
127 | 0x00000010, /* VMX set 3: byte 0 bit 4 */ | ||
128 | 0, /* BFP set 1 */ | ||
129 | 0x00000022, /* BFP set 2: byte 0 bits 1, 5 */ | ||
130 | 0, 0 | ||
131 | }; | ||
132 | |||
133 | /* | ||
134 | * Returns 1 if event counts things relating to marked instructions | ||
135 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
136 | */ | ||
137 | static int power6_marked_instr_event(u64 event) | ||
138 | { | ||
139 | int pmc, psel, ptype; | ||
140 | int bit, byte, unit; | ||
141 | u32 mask; | ||
142 | |||
143 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
144 | psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */ | ||
145 | if (pmc >= 5) | ||
146 | return 0; | ||
147 | |||
148 | bit = -1; | ||
149 | if (psel < sizeof(direct_event_is_marked)) { | ||
150 | ptype = direct_event_is_marked[psel]; | ||
151 | if (pmc == 0 || !(ptype & (1 << (pmc - 1)))) | ||
152 | return 0; | ||
153 | ptype >>= 4; | ||
154 | if (ptype == 0) | ||
155 | return 1; | ||
156 | if (ptype == 1) | ||
157 | bit = 0; | ||
158 | else | ||
159 | bit = ptype ^ (pmc - 1); | ||
160 | } else if ((psel & 0x48) == 0x40) | ||
161 | bit = psel & 7; | ||
162 | |||
163 | if (!(event & PM_BUSEVENT_MSK) || bit == -1) | ||
164 | return 0; | ||
165 | |||
166 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
167 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
168 | mask = marked_bus_events[unit]; | ||
169 | return (mask >> (byte * 8 + bit)) & 1; | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Assign PMC numbers and compute MMCR1 value for a set of events | ||
174 | */ | ||
175 | static int p6_compute_mmcr(u64 event[], int n_ev, | ||
176 | unsigned int hwc[], u64 mmcr[]) | ||
177 | { | ||
178 | u64 mmcr1 = 0; | ||
179 | u64 mmcra = 0; | ||
180 | int i; | ||
181 | unsigned int pmc, ev, b, u, s, psel; | ||
182 | unsigned int ttmset = 0; | ||
183 | unsigned int pmc_inuse = 0; | ||
184 | |||
185 | if (n_ev > 6) | ||
186 | return -1; | ||
187 | for (i = 0; i < n_ev; ++i) { | ||
188 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
189 | if (pmc) { | ||
190 | if (pmc_inuse & (1 << (pmc - 1))) | ||
191 | return -1; /* collision! */ | ||
192 | pmc_inuse |= 1 << (pmc - 1); | ||
193 | } | ||
194 | } | ||
195 | for (i = 0; i < n_ev; ++i) { | ||
196 | ev = event[i]; | ||
197 | pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; | ||
198 | if (pmc) { | ||
199 | --pmc; | ||
200 | } else { | ||
201 | /* can go on any PMC; find a free one */ | ||
202 | for (pmc = 0; pmc < 4; ++pmc) | ||
203 | if (!(pmc_inuse & (1 << pmc))) | ||
204 | break; | ||
205 | if (pmc >= 4) | ||
206 | return -1; | ||
207 | pmc_inuse |= 1 << pmc; | ||
208 | } | ||
209 | hwc[i] = pmc; | ||
210 | psel = ev & PM_PMCSEL_MSK; | ||
211 | if (ev & PM_BUSEVENT_MSK) { | ||
212 | /* this event uses the event bus */ | ||
213 | b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
214 | u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
215 | /* check for conflict on this byte of event bus */ | ||
216 | if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u) | ||
217 | return -1; | ||
218 | mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b); | ||
219 | ttmset |= 1 << b; | ||
220 | if (u == 5) { | ||
221 | /* Nest events have a further mux */ | ||
222 | s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; | ||
223 | if ((ttmset & 0x10) && | ||
224 | MMCR1_NESTSEL(mmcr1) != s) | ||
225 | return -1; | ||
226 | ttmset |= 0x10; | ||
227 | mmcr1 |= (u64)s << MMCR1_NESTSEL_SH; | ||
228 | } | ||
229 | if (0x30 <= psel && psel <= 0x3d) { | ||
230 | /* these need the PMCx_ADDR_SEL bits */ | ||
231 | if (b >= 2) | ||
232 | mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc; | ||
233 | } | ||
234 | /* bus select values are different for PMC3/4 */ | ||
235 | if (pmc >= 2 && (psel & 0x90) == 0x80) | ||
236 | psel ^= 0x20; | ||
237 | } | ||
238 | if (ev & PM_LLA) { | ||
239 | mmcr1 |= MMCR1_PMC1_LLA >> pmc; | ||
240 | if (ev & PM_LLAV) | ||
241 | mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc; | ||
242 | } | ||
243 | if (power6_marked_instr_event(event[i])) | ||
244 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
245 | if (pmc < 4) | ||
246 | mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); | ||
247 | } | ||
248 | mmcr[0] = 0; | ||
249 | if (pmc_inuse & 1) | ||
250 | mmcr[0] = MMCR0_PMC1CE; | ||
251 | if (pmc_inuse & 0xe) | ||
252 | mmcr[0] |= MMCR0_PMCjCE; | ||
253 | mmcr[1] = mmcr1; | ||
254 | mmcr[2] = mmcra; | ||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | /* | ||
259 | * Layout of constraint bits: | ||
260 | * | ||
261 | * 0-1 add field: number of uses of PMC1 (max 1) | ||
262 | * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6 | ||
263 | * 12-15 add field: number of uses of PMC1-4 (max 4) | ||
264 | * 16-19 select field: unit on byte 0 of event bus | ||
265 | * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 | ||
266 | * 32-34 select field: nest (subunit) event selector | ||
267 | */ | ||
268 | static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp) | ||
269 | { | ||
270 | int pmc, byte, sh, subunit; | ||
271 | u64 mask = 0, value = 0; | ||
272 | |||
273 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
274 | if (pmc) { | ||
275 | if (pmc > 4 && !(event == 0x500009 || event == 0x600005)) | ||
276 | return -1; | ||
277 | sh = (pmc - 1) * 2; | ||
278 | mask |= 2 << sh; | ||
279 | value |= 1 << sh; | ||
280 | } | ||
281 | if (event & PM_BUSEVENT_MSK) { | ||
282 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
283 | sh = byte * 4 + (16 - PM_UNIT_SH); | ||
284 | mask |= PM_UNIT_MSKS << sh; | ||
285 | value |= (u64)(event & PM_UNIT_MSKS) << sh; | ||
286 | if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { | ||
287 | subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; | ||
288 | mask |= (u64)PM_SUBUNIT_MSK << 32; | ||
289 | value |= (u64)subunit << 32; | ||
290 | } | ||
291 | } | ||
292 | if (pmc <= 4) { | ||
293 | mask |= 0x8000; /* add field for count of PMC1-4 uses */ | ||
294 | value |= 0x1000; | ||
295 | } | ||
296 | *maskp = mask; | ||
297 | *valp = value; | ||
298 | return 0; | ||
299 | } | ||
300 | |||
301 | static int p6_limited_pmc_event(u64 event) | ||
302 | { | ||
303 | int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
304 | |||
305 | return pmc == 5 || pmc == 6; | ||
306 | } | ||
307 | |||
308 | #define MAX_ALT 4 /* at most 4 alternatives for any event */ | ||
309 | |||
310 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
311 | { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */ | ||
312 | { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */ | ||
313 | { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */ | ||
314 | { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */ | ||
315 | { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */ | ||
316 | { 0x10000e, 0x400010 }, /* PM_PURR */ | ||
317 | { 0x100010, 0x4000f8 }, /* PM_FLUSH */ | ||
318 | { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */ | ||
319 | { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */ | ||
320 | { 0x100054, 0x2000f0 }, /* PM_ST_FIN */ | ||
321 | { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */ | ||
322 | { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */ | ||
323 | { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */ | ||
324 | { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */ | ||
325 | { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */ | ||
326 | { 0x200012, 0x300012 }, /* PM_INST_DISP */ | ||
327 | { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */ | ||
328 | { 0x2000f8, 0x300010 }, /* PM_EXT_INT */ | ||
329 | { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */ | ||
330 | { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */ | ||
331 | { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */ | ||
332 | { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */ | ||
333 | { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */ | ||
334 | }; | ||
335 | |||
336 | /* | ||
337 | * This could be made more efficient with a binary search on | ||
338 | * a presorted list, if necessary | ||
339 | */ | ||
340 | static int find_alternatives_list(u64 event) | ||
341 | { | ||
342 | int i, j; | ||
343 | unsigned int alt; | ||
344 | |||
345 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
346 | if (event < event_alternatives[i][0]) | ||
347 | return -1; | ||
348 | for (j = 0; j < MAX_ALT; ++j) { | ||
349 | alt = event_alternatives[i][j]; | ||
350 | if (!alt || event < alt) | ||
351 | break; | ||
352 | if (event == alt) | ||
353 | return i; | ||
354 | } | ||
355 | } | ||
356 | return -1; | ||
357 | } | ||
358 | |||
359 | static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
360 | { | ||
361 | int i, j, nlim; | ||
362 | unsigned int psel, pmc; | ||
363 | unsigned int nalt = 1; | ||
364 | u64 aevent; | ||
365 | |||
366 | alt[0] = event; | ||
367 | nlim = p6_limited_pmc_event(event); | ||
368 | |||
369 | /* check the alternatives table */ | ||
370 | i = find_alternatives_list(event); | ||
371 | if (i >= 0) { | ||
372 | /* copy out alternatives from list */ | ||
373 | for (j = 0; j < MAX_ALT; ++j) { | ||
374 | aevent = event_alternatives[i][j]; | ||
375 | if (!aevent) | ||
376 | break; | ||
377 | if (aevent != event) | ||
378 | alt[nalt++] = aevent; | ||
379 | nlim += p6_limited_pmc_event(aevent); | ||
380 | } | ||
381 | |||
382 | } else { | ||
383 | /* Check for alternative ways of computing sum events */ | ||
384 | /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */ | ||
385 | psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */ | ||
386 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
387 | if (pmc && (psel == 0x32 || psel == 0x34)) | ||
388 | alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) | | ||
389 | ((5 - pmc) << PM_PMC_SH); | ||
390 | |||
391 | /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */ | ||
392 | if (pmc && (psel == 0x38 || psel == 0x3a)) | ||
393 | alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) | | ||
394 | ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH); | ||
395 | } | ||
396 | |||
397 | if (flags & PPMU_ONLY_COUNT_RUN) { | ||
398 | /* | ||
399 | * We're only counting in RUN state, | ||
400 | * so PM_CYC is equivalent to PM_RUN_CYC, | ||
401 | * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR. | ||
402 | * This doesn't include alternatives that don't provide | ||
403 | * any extra flexibility in assigning PMCs (e.g. | ||
404 | * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC). | ||
405 | * Note that even with these additional alternatives | ||
406 | * we never end up with more than 4 alternatives for any event. | ||
407 | */ | ||
408 | j = nalt; | ||
409 | for (i = 0; i < nalt; ++i) { | ||
410 | switch (alt[i]) { | ||
411 | case 0x1e: /* PM_CYC */ | ||
412 | alt[j++] = 0x600005; /* PM_RUN_CYC */ | ||
413 | ++nlim; | ||
414 | break; | ||
415 | case 0x10000a: /* PM_RUN_CYC */ | ||
416 | alt[j++] = 0x1e; /* PM_CYC */ | ||
417 | break; | ||
418 | case 2: /* PM_INST_CMPL */ | ||
419 | alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ | ||
420 | ++nlim; | ||
421 | break; | ||
422 | case 0x500009: /* PM_RUN_INST_CMPL */ | ||
423 | alt[j++] = 2; /* PM_INST_CMPL */ | ||
424 | break; | ||
425 | case 0x10000e: /* PM_PURR */ | ||
426 | alt[j++] = 0x4000f4; /* PM_RUN_PURR */ | ||
427 | break; | ||
428 | case 0x4000f4: /* PM_RUN_PURR */ | ||
429 | alt[j++] = 0x10000e; /* PM_PURR */ | ||
430 | break; | ||
431 | } | ||
432 | } | ||
433 | nalt = j; | ||
434 | } | ||
435 | |||
436 | if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { | ||
437 | /* remove the limited PMC events */ | ||
438 | j = 0; | ||
439 | for (i = 0; i < nalt; ++i) { | ||
440 | if (!p6_limited_pmc_event(alt[i])) { | ||
441 | alt[j] = alt[i]; | ||
442 | ++j; | ||
443 | } | ||
444 | } | ||
445 | nalt = j; | ||
446 | } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { | ||
447 | /* remove all but the limited PMC events */ | ||
448 | j = 0; | ||
449 | for (i = 0; i < nalt; ++i) { | ||
450 | if (p6_limited_pmc_event(alt[i])) { | ||
451 | alt[j] = alt[i]; | ||
452 | ++j; | ||
453 | } | ||
454 | } | ||
455 | nalt = j; | ||
456 | } | ||
457 | |||
458 | return nalt; | ||
459 | } | ||
460 | |||
461 | static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
462 | { | ||
463 | /* Set PMCxSEL to 0 to disable PMCx */ | ||
464 | if (pmc <= 3) | ||
465 | mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); | ||
466 | } | ||
467 | |||
468 | static int power6_generic_events[] = { | ||
469 | [PERF_COUNT_HW_CPU_CYCLES] = 0x1e, | ||
470 | [PERF_COUNT_HW_INSTRUCTIONS] = 2, | ||
471 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */ | ||
472 | [PERF_COUNT_HW_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */ | ||
473 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */ | ||
474 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x400052, /* BR_MPRED */ | ||
475 | }; | ||
476 | |||
477 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
478 | |||
479 | /* | ||
480 | * Table of generalized cache-related events. | ||
481 | * 0 means not supported, -1 means nonsensical, other values | ||
482 | * are event codes. | ||
483 | * The "DTLB" and "ITLB" events relate to the DERAT and IERAT. | ||
484 | */ | ||
485 | static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
486 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
487 | [C(OP_READ)] = { 0x80082, 0x80080 }, | ||
488 | [C(OP_WRITE)] = { 0x80086, 0x80088 }, | ||
489 | [C(OP_PREFETCH)] = { 0x810a4, 0 }, | ||
490 | }, | ||
491 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
492 | [C(OP_READ)] = { 0, 0x100056 }, | ||
493 | [C(OP_WRITE)] = { -1, -1 }, | ||
494 | [C(OP_PREFETCH)] = { 0x4008c, 0 }, | ||
495 | }, | ||
496 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
497 | [C(OP_READ)] = { 0x150730, 0x250532 }, | ||
498 | [C(OP_WRITE)] = { 0x250432, 0x150432 }, | ||
499 | [C(OP_PREFETCH)] = { 0x810a6, 0 }, | ||
500 | }, | ||
501 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
502 | [C(OP_READ)] = { 0, 0x20000e }, | ||
503 | [C(OP_WRITE)] = { -1, -1 }, | ||
504 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
505 | }, | ||
506 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
507 | [C(OP_READ)] = { 0, 0x420ce }, | ||
508 | [C(OP_WRITE)] = { -1, -1 }, | ||
509 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
510 | }, | ||
511 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
512 | [C(OP_READ)] = { 0x430e6, 0x400052 }, | ||
513 | [C(OP_WRITE)] = { -1, -1 }, | ||
514 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
515 | }, | ||
516 | }; | ||
517 | |||
518 | struct power_pmu power6_pmu = { | ||
519 | .n_counter = 6, | ||
520 | .max_alternatives = MAX_ALT, | ||
521 | .add_fields = 0x1555, | ||
522 | .test_adder = 0x3000, | ||
523 | .compute_mmcr = p6_compute_mmcr, | ||
524 | .get_constraint = p6_get_constraint, | ||
525 | .get_alternatives = p6_get_alternatives, | ||
526 | .disable_pmc = p6_disable_pmc, | ||
527 | .limited_pmc_event = p6_limited_pmc_event, | ||
528 | .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, | ||
529 | .n_generic = ARRAY_SIZE(power6_generic_events), | ||
530 | .generic_events = power6_generic_events, | ||
531 | .cache_events = &power6_cache_events, | ||
532 | }; | ||
diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c new file mode 100644 index 000000000000..b3f7d1216bae --- /dev/null +++ b/arch/powerpc/kernel/power7-pmu.c | |||
@@ -0,0 +1,357 @@ | |||
1 | /* | ||
2 | * Performance counter support for POWER7 processors. | ||
3 | * | ||
4 | * Copyright 2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for POWER7 | ||
17 | */ | ||
18 | #define PM_PMC_SH 16 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0xf | ||
20 | #define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) | ||
21 | #define PM_UNIT_SH 12 /* TTMMUX number and setting - unit select */ | ||
22 | #define PM_UNIT_MSK 0xf | ||
23 | #define PM_COMBINE_SH 11 /* Combined event bit */ | ||
24 | #define PM_COMBINE_MSK 1 | ||
25 | #define PM_COMBINE_MSKS 0x800 | ||
26 | #define PM_L2SEL_SH 8 /* L2 event select */ | ||
27 | #define PM_L2SEL_MSK 7 | ||
28 | #define PM_PMCSEL_MSK 0xff | ||
29 | |||
30 | /* | ||
31 | * Bits in MMCR1 for POWER7 | ||
32 | */ | ||
33 | #define MMCR1_TTM0SEL_SH 60 | ||
34 | #define MMCR1_TTM1SEL_SH 56 | ||
35 | #define MMCR1_TTM2SEL_SH 52 | ||
36 | #define MMCR1_TTM3SEL_SH 48 | ||
37 | #define MMCR1_TTMSEL_MSK 0xf | ||
38 | #define MMCR1_L2SEL_SH 45 | ||
39 | #define MMCR1_L2SEL_MSK 7 | ||
40 | #define MMCR1_PMC1_COMBINE_SH 35 | ||
41 | #define MMCR1_PMC2_COMBINE_SH 34 | ||
42 | #define MMCR1_PMC3_COMBINE_SH 33 | ||
43 | #define MMCR1_PMC4_COMBINE_SH 32 | ||
44 | #define MMCR1_PMC1SEL_SH 24 | ||
45 | #define MMCR1_PMC2SEL_SH 16 | ||
46 | #define MMCR1_PMC3SEL_SH 8 | ||
47 | #define MMCR1_PMC4SEL_SH 0 | ||
48 | #define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) | ||
49 | #define MMCR1_PMCSEL_MSK 0xff | ||
50 | |||
51 | /* | ||
52 | * Bits in MMCRA | ||
53 | */ | ||
54 | |||
55 | /* | ||
56 | * Layout of constraint bits: | ||
57 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
58 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
59 | * [ ><><><><><><> | ||
60 | * NC P6P5P4P3P2P1 | ||
61 | * | ||
62 | * NC - number of counters | ||
63 | * 15: NC error 0x8000 | ||
64 | * 12-14: number of events needing PMC1-4 0x7000 | ||
65 | * | ||
66 | * P6 | ||
67 | * 11: P6 error 0x800 | ||
68 | * 10-11: Count of events needing PMC6 | ||
69 | * | ||
70 | * P1..P5 | ||
71 | * 0-9: Count of events needing PMC1..PMC5 | ||
72 | */ | ||
73 | |||
74 | static int power7_get_constraint(u64 event, u64 *maskp, u64 *valp) | ||
75 | { | ||
76 | int pmc, sh; | ||
77 | u64 mask = 0, value = 0; | ||
78 | |||
79 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
80 | if (pmc) { | ||
81 | if (pmc > 6) | ||
82 | return -1; | ||
83 | sh = (pmc - 1) * 2; | ||
84 | mask |= 2 << sh; | ||
85 | value |= 1 << sh; | ||
86 | if (pmc >= 5 && !(event == 0x500fa || event == 0x600f4)) | ||
87 | return -1; | ||
88 | } | ||
89 | if (pmc < 5) { | ||
90 | /* need a counter from PMC1-4 set */ | ||
91 | mask |= 0x8000; | ||
92 | value |= 0x1000; | ||
93 | } | ||
94 | *maskp = mask; | ||
95 | *valp = value; | ||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | #define MAX_ALT 2 /* at most 2 alternatives for any event */ | ||
100 | |||
101 | static const unsigned int event_alternatives[][MAX_ALT] = { | ||
102 | { 0x200f2, 0x300f2 }, /* PM_INST_DISP */ | ||
103 | { 0x200f4, 0x600f4 }, /* PM_RUN_CYC */ | ||
104 | { 0x400fa, 0x500fa }, /* PM_RUN_INST_CMPL */ | ||
105 | }; | ||
106 | |||
107 | /* | ||
108 | * Scan the alternatives table for a match and return the | ||
109 | * index into the alternatives table if found, else -1. | ||
110 | */ | ||
111 | static int find_alternative(u64 event) | ||
112 | { | ||
113 | int i, j; | ||
114 | |||
115 | for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { | ||
116 | if (event < event_alternatives[i][0]) | ||
117 | break; | ||
118 | for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) | ||
119 | if (event == event_alternatives[i][j]) | ||
120 | return i; | ||
121 | } | ||
122 | return -1; | ||
123 | } | ||
124 | |||
125 | static s64 find_alternative_decode(u64 event) | ||
126 | { | ||
127 | int pmc, psel; | ||
128 | |||
129 | /* this only handles the 4x decode events */ | ||
130 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
131 | psel = event & PM_PMCSEL_MSK; | ||
132 | if ((pmc == 2 || pmc == 4) && (psel & ~7) == 0x40) | ||
133 | return event - (1 << PM_PMC_SH) + 8; | ||
134 | if ((pmc == 1 || pmc == 3) && (psel & ~7) == 0x48) | ||
135 | return event + (1 << PM_PMC_SH) - 8; | ||
136 | return -1; | ||
137 | } | ||
138 | |||
139 | static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
140 | { | ||
141 | int i, j, nalt = 1; | ||
142 | s64 ae; | ||
143 | |||
144 | alt[0] = event; | ||
145 | nalt = 1; | ||
146 | i = find_alternative(event); | ||
147 | if (i >= 0) { | ||
148 | for (j = 0; j < MAX_ALT; ++j) { | ||
149 | ae = event_alternatives[i][j]; | ||
150 | if (ae && ae != event) | ||
151 | alt[nalt++] = ae; | ||
152 | } | ||
153 | } else { | ||
154 | ae = find_alternative_decode(event); | ||
155 | if (ae > 0) | ||
156 | alt[nalt++] = ae; | ||
157 | } | ||
158 | |||
159 | if (flags & PPMU_ONLY_COUNT_RUN) { | ||
160 | /* | ||
161 | * We're only counting in RUN state, | ||
162 | * so PM_CYC is equivalent to PM_RUN_CYC | ||
163 | * and PM_INST_CMPL === PM_RUN_INST_CMPL. | ||
164 | * This doesn't include alternatives that don't provide | ||
165 | * any extra flexibility in assigning PMCs. | ||
166 | */ | ||
167 | j = nalt; | ||
168 | for (i = 0; i < nalt; ++i) { | ||
169 | switch (alt[i]) { | ||
170 | case 0x1e: /* PM_CYC */ | ||
171 | alt[j++] = 0x600f4; /* PM_RUN_CYC */ | ||
172 | break; | ||
173 | case 0x600f4: /* PM_RUN_CYC */ | ||
174 | alt[j++] = 0x1e; | ||
175 | break; | ||
176 | case 0x2: /* PM_PPC_CMPL */ | ||
177 | alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */ | ||
178 | break; | ||
179 | case 0x500fa: /* PM_RUN_INST_CMPL */ | ||
180 | alt[j++] = 0x2; /* PM_PPC_CMPL */ | ||
181 | break; | ||
182 | } | ||
183 | } | ||
184 | nalt = j; | ||
185 | } | ||
186 | |||
187 | return nalt; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * Returns 1 if event counts things relating to marked instructions | ||
192 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
193 | */ | ||
194 | static int power7_marked_instr_event(u64 event) | ||
195 | { | ||
196 | int pmc, psel; | ||
197 | int unit; | ||
198 | |||
199 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
200 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
201 | psel = event & PM_PMCSEL_MSK & ~1; /* trim off edge/level bit */ | ||
202 | if (pmc >= 5) | ||
203 | return 0; | ||
204 | |||
205 | switch (psel >> 4) { | ||
206 | case 2: | ||
207 | return pmc == 2 || pmc == 4; | ||
208 | case 3: | ||
209 | if (psel == 0x3c) | ||
210 | return pmc == 1; | ||
211 | if (psel == 0x3e) | ||
212 | return pmc != 2; | ||
213 | return 1; | ||
214 | case 4: | ||
215 | case 5: | ||
216 | return unit == 0xd; | ||
217 | case 6: | ||
218 | if (psel == 0x64) | ||
219 | return pmc >= 3; | ||
220 | case 8: | ||
221 | return unit == 0xd; | ||
222 | } | ||
223 | return 0; | ||
224 | } | ||
225 | |||
226 | static int power7_compute_mmcr(u64 event[], int n_ev, | ||
227 | unsigned int hwc[], u64 mmcr[]) | ||
228 | { | ||
229 | u64 mmcr1 = 0; | ||
230 | u64 mmcra = 0; | ||
231 | unsigned int pmc, unit, combine, l2sel, psel; | ||
232 | unsigned int pmc_inuse = 0; | ||
233 | int i; | ||
234 | |||
235 | /* First pass to count resource use */ | ||
236 | for (i = 0; i < n_ev; ++i) { | ||
237 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
238 | if (pmc) { | ||
239 | if (pmc > 6) | ||
240 | return -1; | ||
241 | if (pmc_inuse & (1 << (pmc - 1))) | ||
242 | return -1; | ||
243 | pmc_inuse |= 1 << (pmc - 1); | ||
244 | } | ||
245 | } | ||
246 | |||
247 | /* Second pass: assign PMCs, set all MMCR1 fields */ | ||
248 | for (i = 0; i < n_ev; ++i) { | ||
249 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
250 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
251 | combine = (event[i] >> PM_COMBINE_SH) & PM_COMBINE_MSK; | ||
252 | l2sel = (event[i] >> PM_L2SEL_SH) & PM_L2SEL_MSK; | ||
253 | psel = event[i] & PM_PMCSEL_MSK; | ||
254 | if (!pmc) { | ||
255 | /* Bus event or any-PMC direct event */ | ||
256 | for (pmc = 0; pmc < 4; ++pmc) { | ||
257 | if (!(pmc_inuse & (1 << pmc))) | ||
258 | break; | ||
259 | } | ||
260 | if (pmc >= 4) | ||
261 | return -1; | ||
262 | pmc_inuse |= 1 << pmc; | ||
263 | } else { | ||
264 | /* Direct or decoded event */ | ||
265 | --pmc; | ||
266 | } | ||
267 | if (pmc <= 3) { | ||
268 | mmcr1 |= (u64) unit << (MMCR1_TTM0SEL_SH - 4 * pmc); | ||
269 | mmcr1 |= (u64) combine << (MMCR1_PMC1_COMBINE_SH - pmc); | ||
270 | mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); | ||
271 | if (unit == 6) /* L2 events */ | ||
272 | mmcr1 |= (u64) l2sel << MMCR1_L2SEL_SH; | ||
273 | } | ||
274 | if (power7_marked_instr_event(event[i])) | ||
275 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
276 | hwc[i] = pmc; | ||
277 | } | ||
278 | |||
279 | /* Return MMCRx values */ | ||
280 | mmcr[0] = 0; | ||
281 | if (pmc_inuse & 1) | ||
282 | mmcr[0] = MMCR0_PMC1CE; | ||
283 | if (pmc_inuse & 0x3e) | ||
284 | mmcr[0] |= MMCR0_PMCjCE; | ||
285 | mmcr[1] = mmcr1; | ||
286 | mmcr[2] = mmcra; | ||
287 | return 0; | ||
288 | } | ||
289 | |||
290 | static void power7_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
291 | { | ||
292 | if (pmc <= 3) | ||
293 | mmcr[1] &= ~(0xffULL << MMCR1_PMCSEL_SH(pmc)); | ||
294 | } | ||
295 | |||
296 | static int power7_generic_events[] = { | ||
297 | [PERF_COUNT_CPU_CYCLES] = 0x1e, | ||
298 | [PERF_COUNT_INSTRUCTIONS] = 2, | ||
299 | [PERF_COUNT_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU */ | ||
300 | [PERF_COUNT_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */ | ||
301 | [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */ | ||
302 | [PERF_COUNT_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */ | ||
303 | }; | ||
304 | |||
305 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
306 | |||
307 | /* | ||
308 | * Table of generalized cache-related events. | ||
309 | * 0 means not supported, -1 means nonsensical, other values | ||
310 | * are event codes. | ||
311 | */ | ||
312 | static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
313 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
314 | [C(OP_READ)] = { 0x400f0, 0xc880 }, | ||
315 | [C(OP_WRITE)] = { 0, 0x300f0 }, | ||
316 | [C(OP_PREFETCH)] = { 0xd8b8, 0 }, | ||
317 | }, | ||
318 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
319 | [C(OP_READ)] = { 0, 0x200fc }, | ||
320 | [C(OP_WRITE)] = { -1, -1 }, | ||
321 | [C(OP_PREFETCH)] = { 0x408a, 0 }, | ||
322 | }, | ||
323 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
324 | [C(OP_READ)] = { 0x6080, 0x6084 }, | ||
325 | [C(OP_WRITE)] = { 0x6082, 0x6086 }, | ||
326 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
327 | }, | ||
328 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
329 | [C(OP_READ)] = { 0, 0x300fc }, | ||
330 | [C(OP_WRITE)] = { -1, -1 }, | ||
331 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
332 | }, | ||
333 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
334 | [C(OP_READ)] = { 0, 0x400fc }, | ||
335 | [C(OP_WRITE)] = { -1, -1 }, | ||
336 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
337 | }, | ||
338 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
339 | [C(OP_READ)] = { 0x10068, 0x400f6 }, | ||
340 | [C(OP_WRITE)] = { -1, -1 }, | ||
341 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
342 | }, | ||
343 | }; | ||
344 | |||
345 | struct power_pmu power7_pmu = { | ||
346 | .n_counter = 6, | ||
347 | .max_alternatives = MAX_ALT + 1, | ||
348 | .add_fields = 0x1555ull, | ||
349 | .test_adder = 0x3000ull, | ||
350 | .compute_mmcr = power7_compute_mmcr, | ||
351 | .get_constraint = power7_get_constraint, | ||
352 | .get_alternatives = power7_get_alternatives, | ||
353 | .disable_pmc = power7_disable_pmc, | ||
354 | .n_generic = ARRAY_SIZE(power7_generic_events), | ||
355 | .generic_events = power7_generic_events, | ||
356 | .cache_events = &power7_cache_events, | ||
357 | }; | ||
diff --git a/arch/powerpc/kernel/ppc970-pmu.c b/arch/powerpc/kernel/ppc970-pmu.c new file mode 100644 index 000000000000..ba0a357a89f4 --- /dev/null +++ b/arch/powerpc/kernel/ppc970-pmu.c | |||
@@ -0,0 +1,482 @@ | |||
1 | /* | ||
2 | * Performance counter support for PPC970-family processors. | ||
3 | * | ||
4 | * Copyright 2008-2009 Paul Mackerras, IBM Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | #include <linux/string.h> | ||
12 | #include <linux/perf_counter.h> | ||
13 | #include <asm/reg.h> | ||
14 | |||
15 | /* | ||
16 | * Bits in event code for PPC970 | ||
17 | */ | ||
18 | #define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ | ||
19 | #define PM_PMC_MSK 0xf | ||
20 | #define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ | ||
21 | #define PM_UNIT_MSK 0xf | ||
22 | #define PM_SPCSEL_SH 6 | ||
23 | #define PM_SPCSEL_MSK 3 | ||
24 | #define PM_BYTE_SH 4 /* Byte number of event bus to use */ | ||
25 | #define PM_BYTE_MSK 3 | ||
26 | #define PM_PMCSEL_MSK 0xf | ||
27 | |||
28 | /* Values in PM_UNIT field */ | ||
29 | #define PM_NONE 0 | ||
30 | #define PM_FPU 1 | ||
31 | #define PM_VPU 2 | ||
32 | #define PM_ISU 3 | ||
33 | #define PM_IFU 4 | ||
34 | #define PM_IDU 5 | ||
35 | #define PM_STS 6 | ||
36 | #define PM_LSU0 7 | ||
37 | #define PM_LSU1U 8 | ||
38 | #define PM_LSU1L 9 | ||
39 | #define PM_LASTUNIT 9 | ||
40 | |||
41 | /* | ||
42 | * Bits in MMCR0 for PPC970 | ||
43 | */ | ||
44 | #define MMCR0_PMC1SEL_SH 8 | ||
45 | #define MMCR0_PMC2SEL_SH 1 | ||
46 | #define MMCR_PMCSEL_MSK 0x1f | ||
47 | |||
48 | /* | ||
49 | * Bits in MMCR1 for PPC970 | ||
50 | */ | ||
51 | #define MMCR1_TTM0SEL_SH 62 | ||
52 | #define MMCR1_TTM1SEL_SH 59 | ||
53 | #define MMCR1_TTM3SEL_SH 53 | ||
54 | #define MMCR1_TTMSEL_MSK 3 | ||
55 | #define MMCR1_TD_CP_DBG0SEL_SH 50 | ||
56 | #define MMCR1_TD_CP_DBG1SEL_SH 48 | ||
57 | #define MMCR1_TD_CP_DBG2SEL_SH 46 | ||
58 | #define MMCR1_TD_CP_DBG3SEL_SH 44 | ||
59 | #define MMCR1_PMC1_ADDER_SEL_SH 39 | ||
60 | #define MMCR1_PMC2_ADDER_SEL_SH 38 | ||
61 | #define MMCR1_PMC6_ADDER_SEL_SH 37 | ||
62 | #define MMCR1_PMC5_ADDER_SEL_SH 36 | ||
63 | #define MMCR1_PMC8_ADDER_SEL_SH 35 | ||
64 | #define MMCR1_PMC7_ADDER_SEL_SH 34 | ||
65 | #define MMCR1_PMC3_ADDER_SEL_SH 33 | ||
66 | #define MMCR1_PMC4_ADDER_SEL_SH 32 | ||
67 | #define MMCR1_PMC3SEL_SH 27 | ||
68 | #define MMCR1_PMC4SEL_SH 22 | ||
69 | #define MMCR1_PMC5SEL_SH 17 | ||
70 | #define MMCR1_PMC6SEL_SH 12 | ||
71 | #define MMCR1_PMC7SEL_SH 7 | ||
72 | #define MMCR1_PMC8SEL_SH 2 | ||
73 | |||
74 | static short mmcr1_adder_bits[8] = { | ||
75 | MMCR1_PMC1_ADDER_SEL_SH, | ||
76 | MMCR1_PMC2_ADDER_SEL_SH, | ||
77 | MMCR1_PMC3_ADDER_SEL_SH, | ||
78 | MMCR1_PMC4_ADDER_SEL_SH, | ||
79 | MMCR1_PMC5_ADDER_SEL_SH, | ||
80 | MMCR1_PMC6_ADDER_SEL_SH, | ||
81 | MMCR1_PMC7_ADDER_SEL_SH, | ||
82 | MMCR1_PMC8_ADDER_SEL_SH | ||
83 | }; | ||
84 | |||
85 | /* | ||
86 | * Bits in MMCRA | ||
87 | */ | ||
88 | |||
89 | /* | ||
90 | * Layout of constraint bits: | ||
91 | * 6666555555555544444444443333333333222222222211111111110000000000 | ||
92 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
93 | * <><><>[ >[ >[ >< >< >< >< ><><><><><><><><> | ||
94 | * SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 | ||
95 | * | ||
96 | * SP - SPCSEL constraint | ||
97 | * 48-49: SPCSEL value 0x3_0000_0000_0000 | ||
98 | * | ||
99 | * T0 - TTM0 constraint | ||
100 | * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000 | ||
101 | * | ||
102 | * T1 - TTM1 constraint | ||
103 | * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000 | ||
104 | * | ||
105 | * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS | ||
106 | * 43: UC3 error 0x0800_0000_0000 | ||
107 | * 42: FPU|IFU|VPU events needed 0x0400_0000_0000 | ||
108 | * 41: ISU events needed 0x0200_0000_0000 | ||
109 | * 40: IDU|STS events needed 0x0100_0000_0000 | ||
110 | * | ||
111 | * PS1 | ||
112 | * 39: PS1 error 0x0080_0000_0000 | ||
113 | * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 | ||
114 | * | ||
115 | * PS2 | ||
116 | * 35: PS2 error 0x0008_0000_0000 | ||
117 | * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 | ||
118 | * | ||
119 | * B0 | ||
120 | * 28-31: Byte 0 event source 0xf000_0000 | ||
121 | * Encoding as for the event code | ||
122 | * | ||
123 | * B1, B2, B3 | ||
124 | * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources | ||
125 | * | ||
126 | * P1 | ||
127 | * 15: P1 error 0x8000 | ||
128 | * 14-15: Count of events needing PMC1 | ||
129 | * | ||
130 | * P2..P8 | ||
131 | * 0-13: Count of events needing PMC2..PMC8 | ||
132 | */ | ||
133 | |||
134 | static unsigned char direct_marked_event[8] = { | ||
135 | (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ | ||
136 | (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ | ||
137 | (1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */ | ||
138 | (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ | ||
139 | (1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */ | ||
140 | (1<<3) | (1<<4) | (1<<5), | ||
141 | /* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ | ||
142 | (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ | ||
143 | (1<<4) /* PMC8: PM_MRK_LSU_FIN */ | ||
144 | }; | ||
145 | |||
146 | /* | ||
147 | * Returns 1 if event counts things relating to marked instructions | ||
148 | * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. | ||
149 | */ | ||
150 | static int p970_marked_instr_event(u64 event) | ||
151 | { | ||
152 | int pmc, psel, unit, byte, bit; | ||
153 | unsigned int mask; | ||
154 | |||
155 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
156 | psel = event & PM_PMCSEL_MSK; | ||
157 | if (pmc) { | ||
158 | if (direct_marked_event[pmc - 1] & (1 << psel)) | ||
159 | return 1; | ||
160 | if (psel == 0) /* add events */ | ||
161 | bit = (pmc <= 4)? pmc - 1: 8 - pmc; | ||
162 | else if (psel == 7 || psel == 13) /* decode events */ | ||
163 | bit = 4; | ||
164 | else | ||
165 | return 0; | ||
166 | } else | ||
167 | bit = psel; | ||
168 | |||
169 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
170 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
171 | mask = 0; | ||
172 | switch (unit) { | ||
173 | case PM_VPU: | ||
174 | mask = 0x4c; /* byte 0 bits 2,3,6 */ | ||
175 | case PM_LSU0: | ||
176 | /* byte 2 bits 0,2,3,4,6; all of byte 1 */ | ||
177 | mask = 0x085dff00; | ||
178 | case PM_LSU1L: | ||
179 | mask = 0x50 << 24; /* byte 3 bits 4,6 */ | ||
180 | break; | ||
181 | } | ||
182 | return (mask >> (byte * 8 + bit)) & 1; | ||
183 | } | ||
184 | |||
185 | /* Masks and values for using events from the various units */ | ||
186 | static u64 unit_cons[PM_LASTUNIT+1][2] = { | ||
187 | [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, | ||
188 | [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull }, | ||
189 | [PM_ISU] = { 0x080000000000ull, 0x020000000000ull }, | ||
190 | [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull }, | ||
191 | [PM_IDU] = { 0x380000000000ull, 0x010000000000ull }, | ||
192 | [PM_STS] = { 0x380000000000ull, 0x310000000000ull }, | ||
193 | }; | ||
194 | |||
195 | static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) | ||
196 | { | ||
197 | int pmc, byte, unit, sh, spcsel; | ||
198 | u64 mask = 0, value = 0; | ||
199 | int grp = -1; | ||
200 | |||
201 | pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; | ||
202 | if (pmc) { | ||
203 | if (pmc > 8) | ||
204 | return -1; | ||
205 | sh = (pmc - 1) * 2; | ||
206 | mask |= 2 << sh; | ||
207 | value |= 1 << sh; | ||
208 | grp = ((pmc - 1) >> 1) & 1; | ||
209 | } | ||
210 | unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
211 | if (unit) { | ||
212 | if (unit > PM_LASTUNIT) | ||
213 | return -1; | ||
214 | mask |= unit_cons[unit][0]; | ||
215 | value |= unit_cons[unit][1]; | ||
216 | byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
217 | /* | ||
218 | * Bus events on bytes 0 and 2 can be counted | ||
219 | * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. | ||
220 | */ | ||
221 | if (!pmc) | ||
222 | grp = byte & 1; | ||
223 | /* Set byte lane select field */ | ||
224 | mask |= 0xfULL << (28 - 4 * byte); | ||
225 | value |= (u64)unit << (28 - 4 * byte); | ||
226 | } | ||
227 | if (grp == 0) { | ||
228 | /* increment PMC1/2/5/6 field */ | ||
229 | mask |= 0x8000000000ull; | ||
230 | value |= 0x1000000000ull; | ||
231 | } else if (grp == 1) { | ||
232 | /* increment PMC3/4/7/8 field */ | ||
233 | mask |= 0x800000000ull; | ||
234 | value |= 0x100000000ull; | ||
235 | } | ||
236 | spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; | ||
237 | if (spcsel) { | ||
238 | mask |= 3ull << 48; | ||
239 | value |= (u64)spcsel << 48; | ||
240 | } | ||
241 | *maskp = mask; | ||
242 | *valp = value; | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) | ||
247 | { | ||
248 | alt[0] = event; | ||
249 | |||
250 | /* 2 alternatives for LSU empty */ | ||
251 | if (event == 0x2002 || event == 0x3002) { | ||
252 | alt[1] = event ^ 0x1000; | ||
253 | return 2; | ||
254 | } | ||
255 | |||
256 | return 1; | ||
257 | } | ||
258 | |||
259 | static int p970_compute_mmcr(u64 event[], int n_ev, | ||
260 | unsigned int hwc[], u64 mmcr[]) | ||
261 | { | ||
262 | u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; | ||
263 | unsigned int pmc, unit, byte, psel; | ||
264 | unsigned int ttm, grp; | ||
265 | unsigned int pmc_inuse = 0; | ||
266 | unsigned int pmc_grp_use[2]; | ||
267 | unsigned char busbyte[4]; | ||
268 | unsigned char unituse[16]; | ||
269 | unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 }; | ||
270 | unsigned char ttmuse[2]; | ||
271 | unsigned char pmcsel[8]; | ||
272 | int i; | ||
273 | int spcsel; | ||
274 | |||
275 | if (n_ev > 8) | ||
276 | return -1; | ||
277 | |||
278 | /* First pass to count resource use */ | ||
279 | pmc_grp_use[0] = pmc_grp_use[1] = 0; | ||
280 | memset(busbyte, 0, sizeof(busbyte)); | ||
281 | memset(unituse, 0, sizeof(unituse)); | ||
282 | for (i = 0; i < n_ev; ++i) { | ||
283 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
284 | if (pmc) { | ||
285 | if (pmc_inuse & (1 << (pmc - 1))) | ||
286 | return -1; | ||
287 | pmc_inuse |= 1 << (pmc - 1); | ||
288 | /* count 1/2/5/6 vs 3/4/7/8 use */ | ||
289 | ++pmc_grp_use[((pmc - 1) >> 1) & 1]; | ||
290 | } | ||
291 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
292 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
293 | if (unit) { | ||
294 | if (unit > PM_LASTUNIT) | ||
295 | return -1; | ||
296 | if (!pmc) | ||
297 | ++pmc_grp_use[byte & 1]; | ||
298 | if (busbyte[byte] && busbyte[byte] != unit) | ||
299 | return -1; | ||
300 | busbyte[byte] = unit; | ||
301 | unituse[unit] = 1; | ||
302 | } | ||
303 | } | ||
304 | if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) | ||
305 | return -1; | ||
306 | |||
307 | /* | ||
308 | * Assign resources and set multiplexer selects. | ||
309 | * | ||
310 | * PM_ISU can go either on TTM0 or TTM1, but that's the only | ||
311 | * choice we have to deal with. | ||
312 | */ | ||
313 | if (unituse[PM_ISU] & | ||
314 | (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU])) | ||
315 | unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */ | ||
316 | /* Set TTM[01]SEL fields. */ | ||
317 | ttmuse[0] = ttmuse[1] = 0; | ||
318 | for (i = PM_FPU; i <= PM_STS; ++i) { | ||
319 | if (!unituse[i]) | ||
320 | continue; | ||
321 | ttm = unitmap[i]; | ||
322 | ++ttmuse[(ttm >> 2) & 1]; | ||
323 | mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH; | ||
324 | } | ||
325 | /* Check only one unit per TTMx */ | ||
326 | if (ttmuse[0] > 1 || ttmuse[1] > 1) | ||
327 | return -1; | ||
328 | |||
329 | /* Set byte lane select fields and TTM3SEL. */ | ||
330 | for (byte = 0; byte < 4; ++byte) { | ||
331 | unit = busbyte[byte]; | ||
332 | if (!unit) | ||
333 | continue; | ||
334 | if (unit <= PM_STS) | ||
335 | ttm = (unitmap[unit] >> 2) & 1; | ||
336 | else if (unit == PM_LSU0) | ||
337 | ttm = 2; | ||
338 | else { | ||
339 | ttm = 3; | ||
340 | if (unit == PM_LSU1L && byte >= 2) | ||
341 | mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); | ||
342 | } | ||
343 | mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); | ||
344 | } | ||
345 | |||
346 | /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ | ||
347 | memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */ | ||
348 | for (i = 0; i < n_ev; ++i) { | ||
349 | pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; | ||
350 | unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; | ||
351 | byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; | ||
352 | psel = event[i] & PM_PMCSEL_MSK; | ||
353 | if (!pmc) { | ||
354 | /* Bus event or any-PMC direct event */ | ||
355 | if (unit) | ||
356 | psel |= 0x10 | ((byte & 2) << 2); | ||
357 | else | ||
358 | psel |= 8; | ||
359 | for (pmc = 0; pmc < 8; ++pmc) { | ||
360 | if (pmc_inuse & (1 << pmc)) | ||
361 | continue; | ||
362 | grp = (pmc >> 1) & 1; | ||
363 | if (unit) { | ||
364 | if (grp == (byte & 1)) | ||
365 | break; | ||
366 | } else if (pmc_grp_use[grp] < 4) { | ||
367 | ++pmc_grp_use[grp]; | ||
368 | break; | ||
369 | } | ||
370 | } | ||
371 | pmc_inuse |= 1 << pmc; | ||
372 | } else { | ||
373 | /* Direct event */ | ||
374 | --pmc; | ||
375 | if (psel == 0 && (byte & 2)) | ||
376 | /* add events on higher-numbered bus */ | ||
377 | mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; | ||
378 | } | ||
379 | pmcsel[pmc] = psel; | ||
380 | hwc[i] = pmc; | ||
381 | spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; | ||
382 | mmcr1 |= spcsel; | ||
383 | if (p970_marked_instr_event(event[i])) | ||
384 | mmcra |= MMCRA_SAMPLE_ENABLE; | ||
385 | } | ||
386 | for (pmc = 0; pmc < 2; ++pmc) | ||
387 | mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); | ||
388 | for (; pmc < 8; ++pmc) | ||
389 | mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); | ||
390 | if (pmc_inuse & 1) | ||
391 | mmcr0 |= MMCR0_PMC1CE; | ||
392 | if (pmc_inuse & 0xfe) | ||
393 | mmcr0 |= MMCR0_PMCjCE; | ||
394 | |||
395 | mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ | ||
396 | |||
397 | /* Return MMCRx values */ | ||
398 | mmcr[0] = mmcr0; | ||
399 | mmcr[1] = mmcr1; | ||
400 | mmcr[2] = mmcra; | ||
401 | return 0; | ||
402 | } | ||
403 | |||
404 | static void p970_disable_pmc(unsigned int pmc, u64 mmcr[]) | ||
405 | { | ||
406 | int shift, i; | ||
407 | |||
408 | if (pmc <= 1) { | ||
409 | shift = MMCR0_PMC1SEL_SH - 7 * pmc; | ||
410 | i = 0; | ||
411 | } else { | ||
412 | shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2); | ||
413 | i = 1; | ||
414 | } | ||
415 | /* | ||
416 | * Setting the PMCxSEL field to 0x08 disables PMC x. | ||
417 | */ | ||
418 | mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift); | ||
419 | } | ||
420 | |||
421 | static int ppc970_generic_events[] = { | ||
422 | [PERF_COUNT_HW_CPU_CYCLES] = 7, | ||
423 | [PERF_COUNT_HW_INSTRUCTIONS] = 1, | ||
424 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */ | ||
425 | [PERF_COUNT_HW_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */ | ||
426 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */ | ||
427 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */ | ||
428 | }; | ||
429 | |||
430 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
431 | |||
432 | /* | ||
433 | * Table of generalized cache-related events. | ||
434 | * 0 means not supported, -1 means nonsensical, other values | ||
435 | * are event codes. | ||
436 | */ | ||
437 | static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { | ||
438 | [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
439 | [C(OP_READ)] = { 0x8810, 0x3810 }, | ||
440 | [C(OP_WRITE)] = { 0x7810, 0x813 }, | ||
441 | [C(OP_PREFETCH)] = { 0x731, 0 }, | ||
442 | }, | ||
443 | [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
444 | [C(OP_READ)] = { 0, 0 }, | ||
445 | [C(OP_WRITE)] = { -1, -1 }, | ||
446 | [C(OP_PREFETCH)] = { 0, 0 }, | ||
447 | }, | ||
448 | [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
449 | [C(OP_READ)] = { 0, 0 }, | ||
450 | [C(OP_WRITE)] = { 0, 0 }, | ||
451 | [C(OP_PREFETCH)] = { 0x733, 0 }, | ||
452 | }, | ||
453 | [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
454 | [C(OP_READ)] = { 0, 0x704 }, | ||
455 | [C(OP_WRITE)] = { -1, -1 }, | ||
456 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
457 | }, | ||
458 | [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
459 | [C(OP_READ)] = { 0, 0x700 }, | ||
460 | [C(OP_WRITE)] = { -1, -1 }, | ||
461 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
462 | }, | ||
463 | [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ | ||
464 | [C(OP_READ)] = { 0x431, 0x327 }, | ||
465 | [C(OP_WRITE)] = { -1, -1 }, | ||
466 | [C(OP_PREFETCH)] = { -1, -1 }, | ||
467 | }, | ||
468 | }; | ||
469 | |||
470 | struct power_pmu ppc970_pmu = { | ||
471 | .n_counter = 8, | ||
472 | .max_alternatives = 2, | ||
473 | .add_fields = 0x001100005555ull, | ||
474 | .test_adder = 0x013300000000ull, | ||
475 | .compute_mmcr = p970_compute_mmcr, | ||
476 | .get_constraint = p970_get_constraint, | ||
477 | .get_alternatives = p970_get_alternatives, | ||
478 | .disable_pmc = p970_disable_pmc, | ||
479 | .n_generic = ARRAY_SIZE(ppc970_generic_events), | ||
480 | .generic_events = ppc970_generic_events, | ||
481 | .cache_events = &ppc970_cache_events, | ||
482 | }; | ||
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 76993941cac9..5beffc8f481e 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/module.h> | 29 | #include <linux/module.h> |
30 | #include <linux/kprobes.h> | 30 | #include <linux/kprobes.h> |
31 | #include <linux/kdebug.h> | 31 | #include <linux/kdebug.h> |
32 | #include <linux/perf_counter.h> | ||
32 | 33 | ||
33 | #include <asm/firmware.h> | 34 | #include <asm/firmware.h> |
34 | #include <asm/page.h> | 35 | #include <asm/page.h> |
@@ -170,6 +171,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, | |||
170 | die("Weird page fault", regs, SIGSEGV); | 171 | die("Weird page fault", regs, SIGSEGV); |
171 | } | 172 | } |
172 | 173 | ||
174 | perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); | ||
175 | |||
173 | /* When running in the kernel we expect faults to occur only to | 176 | /* When running in the kernel we expect faults to occur only to |
174 | * addresses in user space. All other faults represent errors in the | 177 | * addresses in user space. All other faults represent errors in the |
175 | * kernel and should generate an OOPS. Unfortunately, in the case of an | 178 | * kernel and should generate an OOPS. Unfortunately, in the case of an |
@@ -309,6 +312,8 @@ good_area: | |||
309 | } | 312 | } |
310 | if (ret & VM_FAULT_MAJOR) { | 313 | if (ret & VM_FAULT_MAJOR) { |
311 | current->maj_flt++; | 314 | current->maj_flt++; |
315 | perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, | ||
316 | regs, address); | ||
312 | #ifdef CONFIG_PPC_SMLPAR | 317 | #ifdef CONFIG_PPC_SMLPAR |
313 | if (firmware_has_feature(FW_FEATURE_CMO)) { | 318 | if (firmware_has_feature(FW_FEATURE_CMO)) { |
314 | preempt_disable(); | 319 | preempt_disable(); |
@@ -316,8 +321,11 @@ good_area: | |||
316 | preempt_enable(); | 321 | preempt_enable(); |
317 | } | 322 | } |
318 | #endif | 323 | #endif |
319 | } else | 324 | } else { |
320 | current->min_flt++; | 325 | current->min_flt++; |
326 | perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, | ||
327 | regs, address); | ||
328 | } | ||
321 | up_read(&mm->mmap_sem); | 329 | up_read(&mm->mmap_sem); |
322 | return 0; | 330 | return 0; |
323 | 331 | ||
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 9da795e49337..732ee93a8e98 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype | |||
@@ -1,6 +1,7 @@ | |||
1 | config PPC64 | 1 | config PPC64 |
2 | bool "64-bit kernel" | 2 | bool "64-bit kernel" |
3 | default n | 3 | default n |
4 | select HAVE_PERF_COUNTERS | ||
4 | help | 5 | help |
5 | This option selects whether a 32-bit or a 64-bit kernel | 6 | This option selects whether a 32-bit or a 64-bit kernel |
6 | will be built. | 7 | will be built. |
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index aafae3b140de..68f5578fe38e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -739,6 +739,7 @@ config X86_UP_IOAPIC | |||
739 | config X86_LOCAL_APIC | 739 | config X86_LOCAL_APIC |
740 | def_bool y | 740 | def_bool y |
741 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC | 741 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC |
742 | select HAVE_PERF_COUNTERS if (!M386 && !M486) | ||
742 | 743 | ||
743 | config X86_IO_APIC | 744 | config X86_IO_APIC |
744 | def_bool y | 745 | def_bool y |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index dcef387ddc36..e590261ba059 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -825,10 +825,11 @@ ia32_sys_call_table: | |||
825 | .quad compat_sys_signalfd4 | 825 | .quad compat_sys_signalfd4 |
826 | .quad sys_eventfd2 | 826 | .quad sys_eventfd2 |
827 | .quad sys_epoll_create1 | 827 | .quad sys_epoll_create1 |
828 | .quad sys_dup3 /* 330 */ | 828 | .quad sys_dup3 /* 330 */ |
829 | .quad sys_pipe2 | 829 | .quad sys_pipe2 |
830 | .quad sys_inotify_init1 | 830 | .quad sys_inotify_init1 |
831 | .quad compat_sys_preadv | 831 | .quad compat_sys_preadv |
832 | .quad compat_sys_pwritev | 832 | .quad compat_sys_pwritev |
833 | .quad compat_sys_rt_tgsigqueueinfo /* 335 */ | 833 | .quad compat_sys_rt_tgsigqueueinfo /* 335 */ |
834 | .quad sys_perf_counter_open | ||
834 | ia32_syscall_end: | 835 | ia32_syscall_end: |
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index 85b46fba4229..aff9f1fcdcd7 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h | |||
@@ -247,5 +247,241 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) | |||
247 | #define smp_mb__before_atomic_inc() barrier() | 247 | #define smp_mb__before_atomic_inc() barrier() |
248 | #define smp_mb__after_atomic_inc() barrier() | 248 | #define smp_mb__after_atomic_inc() barrier() |
249 | 249 | ||
250 | /* An 64bit atomic type */ | ||
251 | |||
252 | typedef struct { | ||
253 | unsigned long long counter; | ||
254 | } atomic64_t; | ||
255 | |||
256 | #define ATOMIC64_INIT(val) { (val) } | ||
257 | |||
258 | /** | ||
259 | * atomic64_read - read atomic64 variable | ||
260 | * @v: pointer of type atomic64_t | ||
261 | * | ||
262 | * Atomically reads the value of @v. | ||
263 | * Doesn't imply a read memory barrier. | ||
264 | */ | ||
265 | #define __atomic64_read(ptr) ((ptr)->counter) | ||
266 | |||
267 | static inline unsigned long long | ||
268 | cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new) | ||
269 | { | ||
270 | asm volatile( | ||
271 | |||
272 | LOCK_PREFIX "cmpxchg8b (%[ptr])\n" | ||
273 | |||
274 | : "=A" (old) | ||
275 | |||
276 | : [ptr] "D" (ptr), | ||
277 | "A" (old), | ||
278 | "b" (ll_low(new)), | ||
279 | "c" (ll_high(new)) | ||
280 | |||
281 | : "memory"); | ||
282 | |||
283 | return old; | ||
284 | } | ||
285 | |||
286 | static inline unsigned long long | ||
287 | atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val, | ||
288 | unsigned long long new_val) | ||
289 | { | ||
290 | return cmpxchg8b(&ptr->counter, old_val, new_val); | ||
291 | } | ||
292 | |||
293 | /** | ||
294 | * atomic64_xchg - xchg atomic64 variable | ||
295 | * @ptr: pointer to type atomic64_t | ||
296 | * @new_val: value to assign | ||
297 | * @old_val: old value that was there | ||
298 | * | ||
299 | * Atomically xchgs the value of @ptr to @new_val and returns | ||
300 | * the old value. | ||
301 | */ | ||
302 | |||
303 | static inline unsigned long long | ||
304 | atomic64_xchg(atomic64_t *ptr, unsigned long long new_val) | ||
305 | { | ||
306 | unsigned long long old_val; | ||
307 | |||
308 | do { | ||
309 | old_val = atomic_read(ptr); | ||
310 | } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); | ||
311 | |||
312 | return old_val; | ||
313 | } | ||
314 | |||
315 | /** | ||
316 | * atomic64_set - set atomic64 variable | ||
317 | * @ptr: pointer to type atomic64_t | ||
318 | * @new_val: value to assign | ||
319 | * | ||
320 | * Atomically sets the value of @ptr to @new_val. | ||
321 | */ | ||
322 | static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val) | ||
323 | { | ||
324 | atomic64_xchg(ptr, new_val); | ||
325 | } | ||
326 | |||
327 | /** | ||
328 | * atomic64_read - read atomic64 variable | ||
329 | * @ptr: pointer to type atomic64_t | ||
330 | * | ||
331 | * Atomically reads the value of @ptr and returns it. | ||
332 | */ | ||
333 | static inline unsigned long long atomic64_read(atomic64_t *ptr) | ||
334 | { | ||
335 | unsigned long long curr_val; | ||
336 | |||
337 | do { | ||
338 | curr_val = __atomic64_read(ptr); | ||
339 | } while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val); | ||
340 | |||
341 | return curr_val; | ||
342 | } | ||
343 | |||
344 | /** | ||
345 | * atomic64_add_return - add and return | ||
346 | * @delta: integer value to add | ||
347 | * @ptr: pointer to type atomic64_t | ||
348 | * | ||
349 | * Atomically adds @delta to @ptr and returns @delta + *@ptr | ||
350 | */ | ||
351 | static inline unsigned long long | ||
352 | atomic64_add_return(unsigned long long delta, atomic64_t *ptr) | ||
353 | { | ||
354 | unsigned long long old_val, new_val; | ||
355 | |||
356 | do { | ||
357 | old_val = atomic_read(ptr); | ||
358 | new_val = old_val + delta; | ||
359 | |||
360 | } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); | ||
361 | |||
362 | return new_val; | ||
363 | } | ||
364 | |||
365 | static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr) | ||
366 | { | ||
367 | return atomic64_add_return(-delta, ptr); | ||
368 | } | ||
369 | |||
370 | static inline long atomic64_inc_return(atomic64_t *ptr) | ||
371 | { | ||
372 | return atomic64_add_return(1, ptr); | ||
373 | } | ||
374 | |||
375 | static inline long atomic64_dec_return(atomic64_t *ptr) | ||
376 | { | ||
377 | return atomic64_sub_return(1, ptr); | ||
378 | } | ||
379 | |||
380 | /** | ||
381 | * atomic64_add - add integer to atomic64 variable | ||
382 | * @delta: integer value to add | ||
383 | * @ptr: pointer to type atomic64_t | ||
384 | * | ||
385 | * Atomically adds @delta to @ptr. | ||
386 | */ | ||
387 | static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr) | ||
388 | { | ||
389 | atomic64_add_return(delta, ptr); | ||
390 | } | ||
391 | |||
392 | /** | ||
393 | * atomic64_sub - subtract the atomic64 variable | ||
394 | * @delta: integer value to subtract | ||
395 | * @ptr: pointer to type atomic64_t | ||
396 | * | ||
397 | * Atomically subtracts @delta from @ptr. | ||
398 | */ | ||
399 | static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr) | ||
400 | { | ||
401 | atomic64_add(-delta, ptr); | ||
402 | } | ||
403 | |||
404 | /** | ||
405 | * atomic64_sub_and_test - subtract value from variable and test result | ||
406 | * @delta: integer value to subtract | ||
407 | * @ptr: pointer to type atomic64_t | ||
408 | * | ||
409 | * Atomically subtracts @delta from @ptr and returns | ||
410 | * true if the result is zero, or false for all | ||
411 | * other cases. | ||
412 | */ | ||
413 | static inline int | ||
414 | atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr) | ||
415 | { | ||
416 | unsigned long long old_val = atomic64_sub_return(delta, ptr); | ||
417 | |||
418 | return old_val == 0; | ||
419 | } | ||
420 | |||
421 | /** | ||
422 | * atomic64_inc - increment atomic64 variable | ||
423 | * @ptr: pointer to type atomic64_t | ||
424 | * | ||
425 | * Atomically increments @ptr by 1. | ||
426 | */ | ||
427 | static inline void atomic64_inc(atomic64_t *ptr) | ||
428 | { | ||
429 | atomic64_add(1, ptr); | ||
430 | } | ||
431 | |||
432 | /** | ||
433 | * atomic64_dec - decrement atomic64 variable | ||
434 | * @ptr: pointer to type atomic64_t | ||
435 | * | ||
436 | * Atomically decrements @ptr by 1. | ||
437 | */ | ||
438 | static inline void atomic64_dec(atomic64_t *ptr) | ||
439 | { | ||
440 | atomic64_sub(1, ptr); | ||
441 | } | ||
442 | |||
443 | /** | ||
444 | * atomic64_dec_and_test - decrement and test | ||
445 | * @ptr: pointer to type atomic64_t | ||
446 | * | ||
447 | * Atomically decrements @ptr by 1 and | ||
448 | * returns true if the result is 0, or false for all other | ||
449 | * cases. | ||
450 | */ | ||
451 | static inline int atomic64_dec_and_test(atomic64_t *ptr) | ||
452 | { | ||
453 | return atomic64_sub_and_test(1, ptr); | ||
454 | } | ||
455 | |||
456 | /** | ||
457 | * atomic64_inc_and_test - increment and test | ||
458 | * @ptr: pointer to type atomic64_t | ||
459 | * | ||
460 | * Atomically increments @ptr by 1 | ||
461 | * and returns true if the result is zero, or false for all | ||
462 | * other cases. | ||
463 | */ | ||
464 | static inline int atomic64_inc_and_test(atomic64_t *ptr) | ||
465 | { | ||
466 | return atomic64_sub_and_test(-1, ptr); | ||
467 | } | ||
468 | |||
469 | /** | ||
470 | * atomic64_add_negative - add and test if negative | ||
471 | * @delta: integer value to add | ||
472 | * @ptr: pointer to type atomic64_t | ||
473 | * | ||
474 | * Atomically adds @delta to @ptr and returns true | ||
475 | * if the result is negative, or false when | ||
476 | * result is greater than or equal to zero. | ||
477 | */ | ||
478 | static inline int | ||
479 | atomic64_add_negative(unsigned long long delta, atomic64_t *ptr) | ||
480 | { | ||
481 | long long old_val = atomic64_add_return(delta, ptr); | ||
482 | |||
483 | return old_val < 0; | ||
484 | } | ||
485 | |||
250 | #include <asm-generic/atomic.h> | 486 | #include <asm-generic/atomic.h> |
251 | #endif /* _ASM_X86_ATOMIC_32_H */ | 487 | #endif /* _ASM_X86_ATOMIC_32_H */ |
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index c2e6bedaf258..d750a10ccad6 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -49,7 +49,7 @@ BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) | |||
49 | BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) | 49 | BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) |
50 | 50 | ||
51 | #ifdef CONFIG_PERF_COUNTERS | 51 | #ifdef CONFIG_PERF_COUNTERS |
52 | BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) | 52 | BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) |
53 | #endif | 53 | #endif |
54 | 54 | ||
55 | #ifdef CONFIG_X86_MCE_P4THERMAL | 55 | #ifdef CONFIG_X86_MCE_P4THERMAL |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 37555e52f980..9ebc5c255032 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -13,6 +13,8 @@ typedef struct { | |||
13 | unsigned int irq_spurious_count; | 13 | unsigned int irq_spurious_count; |
14 | #endif | 14 | #endif |
15 | unsigned int generic_irqs; /* arch dependent */ | 15 | unsigned int generic_irqs; /* arch dependent */ |
16 | unsigned int apic_perf_irqs; | ||
17 | unsigned int apic_pending_irqs; | ||
16 | #ifdef CONFIG_SMP | 18 | #ifdef CONFIG_SMP |
17 | unsigned int irq_resched_count; | 19 | unsigned int irq_resched_count; |
18 | unsigned int irq_call_count; | 20 | unsigned int irq_call_count; |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 3bd1777a4c8b..6df45f639666 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -29,6 +29,8 @@ | |||
29 | extern void apic_timer_interrupt(void); | 29 | extern void apic_timer_interrupt(void); |
30 | extern void generic_interrupt(void); | 30 | extern void generic_interrupt(void); |
31 | extern void error_interrupt(void); | 31 | extern void error_interrupt(void); |
32 | extern void perf_pending_interrupt(void); | ||
33 | |||
32 | extern void spurious_interrupt(void); | 34 | extern void spurious_interrupt(void); |
33 | extern void thermal_interrupt(void); | 35 | extern void thermal_interrupt(void); |
34 | extern void reschedule_interrupt(void); | 36 | extern void reschedule_interrupt(void); |
diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h deleted file mode 100644 index fa0fd068bc2e..000000000000 --- a/arch/x86/include/asm/intel_arch_perfmon.h +++ /dev/null | |||
@@ -1,31 +0,0 @@ | |||
1 | #ifndef _ASM_X86_INTEL_ARCH_PERFMON_H | ||
2 | #define _ASM_X86_INTEL_ARCH_PERFMON_H | ||
3 | |||
4 | #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 | ||
5 | #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 | ||
6 | |||
7 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 | ||
8 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 | ||
9 | |||
10 | #define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) | ||
11 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) | ||
12 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) | ||
13 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) | ||
14 | |||
15 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c) | ||
16 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | ||
17 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0) | ||
18 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ | ||
19 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) | ||
20 | |||
21 | union cpuid10_eax { | ||
22 | struct { | ||
23 | unsigned int version_id:8; | ||
24 | unsigned int num_counters:8; | ||
25 | unsigned int bit_width:8; | ||
26 | unsigned int mask_length:8; | ||
27 | } split; | ||
28 | unsigned int full; | ||
29 | }; | ||
30 | |||
31 | #endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */ | ||
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 910b5a3d6751..e997be98c9b9 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -108,14 +108,14 @@ | |||
108 | #define LOCAL_TIMER_VECTOR 0xef | 108 | #define LOCAL_TIMER_VECTOR 0xef |
109 | 109 | ||
110 | /* | 110 | /* |
111 | * Performance monitoring interrupt vector: | 111 | * Generic system vector for platform specific use |
112 | */ | 112 | */ |
113 | #define LOCAL_PERF_VECTOR 0xee | 113 | #define GENERIC_INTERRUPT_VECTOR 0xed |
114 | 114 | ||
115 | /* | 115 | /* |
116 | * Generic system vector for platform specific use | 116 | * Performance monitoring pending work vector: |
117 | */ | 117 | */ |
118 | #define GENERIC_INTERRUPT_VECTOR 0xed | 118 | #define LOCAL_PENDING_VECTOR 0xec |
119 | 119 | ||
120 | /* | 120 | /* |
121 | * First APIC vector available to drivers: (vectors 0x30-0xee) we | 121 | * First APIC vector available to drivers: (vectors 0x30-0xee) we |
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h new file mode 100644 index 000000000000..876ed97147b3 --- /dev/null +++ b/arch/x86/include/asm/perf_counter.h | |||
@@ -0,0 +1,100 @@ | |||
1 | #ifndef _ASM_X86_PERF_COUNTER_H | ||
2 | #define _ASM_X86_PERF_COUNTER_H | ||
3 | |||
4 | /* | ||
5 | * Performance counter hw details: | ||
6 | */ | ||
7 | |||
8 | #define X86_PMC_MAX_GENERIC 8 | ||
9 | #define X86_PMC_MAX_FIXED 3 | ||
10 | |||
11 | #define X86_PMC_IDX_GENERIC 0 | ||
12 | #define X86_PMC_IDX_FIXED 32 | ||
13 | #define X86_PMC_IDX_MAX 64 | ||
14 | |||
15 | #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 | ||
16 | #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 | ||
17 | |||
18 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 | ||
19 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 | ||
20 | |||
21 | #define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) | ||
22 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) | ||
23 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) | ||
24 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) | ||
25 | |||
26 | /* | ||
27 | * Includes eventsel and unit mask as well: | ||
28 | */ | ||
29 | #define ARCH_PERFMON_EVENT_MASK 0xffff | ||
30 | |||
31 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c | ||
32 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | ||
33 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 | ||
34 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ | ||
35 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) | ||
36 | |||
37 | #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 | ||
38 | |||
39 | /* | ||
40 | * Intel "Architectural Performance Monitoring" CPUID | ||
41 | * detection/enumeration details: | ||
42 | */ | ||
43 | union cpuid10_eax { | ||
44 | struct { | ||
45 | unsigned int version_id:8; | ||
46 | unsigned int num_counters:8; | ||
47 | unsigned int bit_width:8; | ||
48 | unsigned int mask_length:8; | ||
49 | } split; | ||
50 | unsigned int full; | ||
51 | }; | ||
52 | |||
53 | union cpuid10_edx { | ||
54 | struct { | ||
55 | unsigned int num_counters_fixed:4; | ||
56 | unsigned int reserved:28; | ||
57 | } split; | ||
58 | unsigned int full; | ||
59 | }; | ||
60 | |||
61 | |||
62 | /* | ||
63 | * Fixed-purpose performance counters: | ||
64 | */ | ||
65 | |||
66 | /* | ||
67 | * All 3 fixed-mode PMCs are configured via this single MSR: | ||
68 | */ | ||
69 | #define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d | ||
70 | |||
71 | /* | ||
72 | * The counts are available in three separate MSRs: | ||
73 | */ | ||
74 | |||
75 | /* Instr_Retired.Any: */ | ||
76 | #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 | ||
77 | #define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) | ||
78 | |||
79 | /* CPU_CLK_Unhalted.Core: */ | ||
80 | #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a | ||
81 | #define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) | ||
82 | |||
83 | /* CPU_CLK_Unhalted.Ref: */ | ||
84 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b | ||
85 | #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) | ||
86 | |||
87 | extern void set_perf_counter_pending(void); | ||
88 | |||
89 | #define clear_perf_counter_pending() do { } while (0) | ||
90 | #define test_perf_counter_pending() (0) | ||
91 | |||
92 | #ifdef CONFIG_PERF_COUNTERS | ||
93 | extern void init_hw_perf_counters(void); | ||
94 | extern void perf_counters_lapic_init(void); | ||
95 | #else | ||
96 | static inline void init_hw_perf_counters(void) { } | ||
97 | static inline void perf_counters_lapic_init(void) { } | ||
98 | #endif | ||
99 | |||
100 | #endif /* _ASM_X86_PERF_COUNTER_H */ | ||
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 708dae61262d..732a30706153 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -341,6 +341,7 @@ | |||
341 | #define __NR_preadv 333 | 341 | #define __NR_preadv 333 |
342 | #define __NR_pwritev 334 | 342 | #define __NR_pwritev 334 |
343 | #define __NR_rt_tgsigqueueinfo 335 | 343 | #define __NR_rt_tgsigqueueinfo 335 |
344 | #define __NR_perf_counter_open 336 | ||
344 | 345 | ||
345 | #ifdef __KERNEL__ | 346 | #ifdef __KERNEL__ |
346 | 347 | ||
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 4e2b05404400..900e1617e672 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -659,7 +659,8 @@ __SYSCALL(__NR_preadv, sys_preadv) | |||
659 | __SYSCALL(__NR_pwritev, sys_pwritev) | 659 | __SYSCALL(__NR_pwritev, sys_pwritev) |
660 | #define __NR_rt_tgsigqueueinfo 297 | 660 | #define __NR_rt_tgsigqueueinfo 297 |
661 | __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) | 661 | __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) |
662 | 662 | #define __NR_perf_counter_open 298 | |
663 | __SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) | ||
663 | 664 | ||
664 | #ifndef __NO_STUBS | 665 | #ifndef __NO_STUBS |
665 | #define __ARCH_WANT_OLD_READDIR | 666 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index a4c9cf0bf70b..076d3881f3da 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -14,6 +14,7 @@ | |||
14 | * Mikael Pettersson : PM converted to driver model. | 14 | * Mikael Pettersson : PM converted to driver model. |
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/perf_counter.h> | ||
17 | #include <linux/kernel_stat.h> | 18 | #include <linux/kernel_stat.h> |
18 | #include <linux/mc146818rtc.h> | 19 | #include <linux/mc146818rtc.h> |
19 | #include <linux/acpi_pmtmr.h> | 20 | #include <linux/acpi_pmtmr.h> |
@@ -34,6 +35,7 @@ | |||
34 | #include <linux/smp.h> | 35 | #include <linux/smp.h> |
35 | #include <linux/mm.h> | 36 | #include <linux/mm.h> |
36 | 37 | ||
38 | #include <asm/perf_counter.h> | ||
37 | #include <asm/pgalloc.h> | 39 | #include <asm/pgalloc.h> |
38 | #include <asm/atomic.h> | 40 | #include <asm/atomic.h> |
39 | #include <asm/mpspec.h> | 41 | #include <asm/mpspec.h> |
@@ -1187,6 +1189,7 @@ void __cpuinit setup_local_APIC(void) | |||
1187 | apic_write(APIC_ESR, 0); | 1189 | apic_write(APIC_ESR, 0); |
1188 | } | 1190 | } |
1189 | #endif | 1191 | #endif |
1192 | perf_counters_lapic_init(); | ||
1190 | 1193 | ||
1191 | preempt_disable(); | 1194 | preempt_disable(); |
1192 | 1195 | ||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1946fac42ab3..94605e7f6a54 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -177,16 +177,18 @@ int __init arch_early_irq_init(void) | |||
177 | struct irq_cfg *cfg; | 177 | struct irq_cfg *cfg; |
178 | struct irq_desc *desc; | 178 | struct irq_desc *desc; |
179 | int count; | 179 | int count; |
180 | int node; | ||
180 | int i; | 181 | int i; |
181 | 182 | ||
182 | cfg = irq_cfgx; | 183 | cfg = irq_cfgx; |
183 | count = ARRAY_SIZE(irq_cfgx); | 184 | count = ARRAY_SIZE(irq_cfgx); |
185 | node= cpu_to_node(boot_cpu_id); | ||
184 | 186 | ||
185 | for (i = 0; i < count; i++) { | 187 | for (i = 0; i < count; i++) { |
186 | desc = irq_to_desc(i); | 188 | desc = irq_to_desc(i); |
187 | desc->chip_data = &cfg[i]; | 189 | desc->chip_data = &cfg[i]; |
188 | alloc_bootmem_cpumask_var(&cfg[i].domain); | 190 | alloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); |
189 | alloc_bootmem_cpumask_var(&cfg[i].old_domain); | 191 | alloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); |
190 | if (i < NR_IRQS_LEGACY) | 192 | if (i < NR_IRQS_LEGACY) |
191 | cpumask_setall(cfg[i].domain); | 193 | cpumask_setall(cfg[i].domain); |
192 | } | 194 | } |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4e242f9a06e4..3efcb2b96a15 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | # | 1 | # |
2 | # Makefile for x86-compatible CPU details and quirks | 2 | # Makefile for x86-compatible CPU details, features and quirks |
3 | # | 3 | # |
4 | 4 | ||
5 | # Don't trace early stages of a secondary CPU boot | 5 | # Don't trace early stages of a secondary CPU boot |
@@ -23,11 +23,13 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o | |||
23 | obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o | 23 | obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o |
24 | obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o | 24 | obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o |
25 | 25 | ||
26 | obj-$(CONFIG_X86_MCE) += mcheck/ | 26 | obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o |
27 | obj-$(CONFIG_MTRR) += mtrr/ | ||
28 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | ||
29 | 27 | ||
30 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | 28 | obj-$(CONFIG_X86_MCE) += mcheck/ |
29 | obj-$(CONFIG_MTRR) += mtrr/ | ||
30 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | ||
31 | |||
32 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | ||
31 | 33 | ||
32 | quiet_cmd_mkcapflags = MKCAP $@ | 34 | quiet_cmd_mkcapflags = MKCAP $@ |
33 | cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ | 35 | cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b0517aa2bd3b..3ffdcfa9abdf 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/io.h> | 13 | #include <linux/io.h> |
14 | 14 | ||
15 | #include <asm/stackprotector.h> | 15 | #include <asm/stackprotector.h> |
16 | #include <asm/perf_counter.h> | ||
16 | #include <asm/mmu_context.h> | 17 | #include <asm/mmu_context.h> |
17 | #include <asm/hypervisor.h> | 18 | #include <asm/hypervisor.h> |
18 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
@@ -874,6 +875,7 @@ void __init identify_boot_cpu(void) | |||
874 | #else | 875 | #else |
875 | vgetcpu_set_mode(); | 876 | vgetcpu_set_mode(); |
876 | #endif | 877 | #endif |
878 | init_hw_perf_counters(); | ||
877 | } | 879 | } |
878 | 880 | ||
879 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 881 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c new file mode 100644 index 000000000000..895c82e78455 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -0,0 +1,1704 @@ | |||
1 | /* | ||
2 | * Performance counter x86 architecture code | ||
3 | * | ||
4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | ||
6 | * Copyright (C) 2009 Jaswinder Singh Rajput | ||
7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | ||
8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
9 | * | ||
10 | * For licencing details see kernel-base/COPYING | ||
11 | */ | ||
12 | |||
13 | #include <linux/perf_counter.h> | ||
14 | #include <linux/capability.h> | ||
15 | #include <linux/notifier.h> | ||
16 | #include <linux/hardirq.h> | ||
17 | #include <linux/kprobes.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/kdebug.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/uaccess.h> | ||
22 | |||
23 | #include <asm/apic.h> | ||
24 | #include <asm/stacktrace.h> | ||
25 | #include <asm/nmi.h> | ||
26 | |||
27 | static u64 perf_counter_mask __read_mostly; | ||
28 | |||
29 | struct cpu_hw_counters { | ||
30 | struct perf_counter *counters[X86_PMC_IDX_MAX]; | ||
31 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
32 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
33 | unsigned long interrupts; | ||
34 | int enabled; | ||
35 | }; | ||
36 | |||
37 | /* | ||
38 | * struct x86_pmu - generic x86 pmu | ||
39 | */ | ||
40 | struct x86_pmu { | ||
41 | const char *name; | ||
42 | int version; | ||
43 | int (*handle_irq)(struct pt_regs *); | ||
44 | void (*disable_all)(void); | ||
45 | void (*enable_all)(void); | ||
46 | void (*enable)(struct hw_perf_counter *, int); | ||
47 | void (*disable)(struct hw_perf_counter *, int); | ||
48 | unsigned eventsel; | ||
49 | unsigned perfctr; | ||
50 | u64 (*event_map)(int); | ||
51 | u64 (*raw_event)(u64); | ||
52 | int max_events; | ||
53 | int num_counters; | ||
54 | int num_counters_fixed; | ||
55 | int counter_bits; | ||
56 | u64 counter_mask; | ||
57 | u64 max_period; | ||
58 | u64 intel_ctrl; | ||
59 | }; | ||
60 | |||
61 | static struct x86_pmu x86_pmu __read_mostly; | ||
62 | |||
63 | static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { | ||
64 | .enabled = 1, | ||
65 | }; | ||
66 | |||
67 | /* | ||
68 | * Intel PerfMon v3. Used on Core2 and later. | ||
69 | */ | ||
70 | static const u64 intel_perfmon_event_map[] = | ||
71 | { | ||
72 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | ||
73 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
74 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, | ||
75 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, | ||
76 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
77 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
78 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | ||
79 | }; | ||
80 | |||
81 | static u64 intel_pmu_event_map(int event) | ||
82 | { | ||
83 | return intel_perfmon_event_map[event]; | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * Generalized hw caching related event table, filled | ||
88 | * in on a per model basis. A value of 0 means | ||
89 | * 'not supported', -1 means 'event makes no sense on | ||
90 | * this CPU', any other value means the raw event | ||
91 | * ID. | ||
92 | */ | ||
93 | |||
94 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
95 | |||
96 | static u64 __read_mostly hw_cache_event_ids | ||
97 | [PERF_COUNT_HW_CACHE_MAX] | ||
98 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
99 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
100 | |||
101 | static const u64 nehalem_hw_cache_event_ids | ||
102 | [PERF_COUNT_HW_CACHE_MAX] | ||
103 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
104 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
105 | { | ||
106 | [ C(L1D) ] = { | ||
107 | [ C(OP_READ) ] = { | ||
108 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
109 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
110 | }, | ||
111 | [ C(OP_WRITE) ] = { | ||
112 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
113 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
114 | }, | ||
115 | [ C(OP_PREFETCH) ] = { | ||
116 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
117 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
118 | }, | ||
119 | }, | ||
120 | [ C(L1I ) ] = { | ||
121 | [ C(OP_READ) ] = { | ||
122 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
123 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
124 | }, | ||
125 | [ C(OP_WRITE) ] = { | ||
126 | [ C(RESULT_ACCESS) ] = -1, | ||
127 | [ C(RESULT_MISS) ] = -1, | ||
128 | }, | ||
129 | [ C(OP_PREFETCH) ] = { | ||
130 | [ C(RESULT_ACCESS) ] = 0x0, | ||
131 | [ C(RESULT_MISS) ] = 0x0, | ||
132 | }, | ||
133 | }, | ||
134 | [ C(LL ) ] = { | ||
135 | [ C(OP_READ) ] = { | ||
136 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
137 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
138 | }, | ||
139 | [ C(OP_WRITE) ] = { | ||
140 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
141 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
142 | }, | ||
143 | [ C(OP_PREFETCH) ] = { | ||
144 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
145 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
146 | }, | ||
147 | }, | ||
148 | [ C(DTLB) ] = { | ||
149 | [ C(OP_READ) ] = { | ||
150 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
151 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
152 | }, | ||
153 | [ C(OP_WRITE) ] = { | ||
154 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
155 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
156 | }, | ||
157 | [ C(OP_PREFETCH) ] = { | ||
158 | [ C(RESULT_ACCESS) ] = 0x0, | ||
159 | [ C(RESULT_MISS) ] = 0x0, | ||
160 | }, | ||
161 | }, | ||
162 | [ C(ITLB) ] = { | ||
163 | [ C(OP_READ) ] = { | ||
164 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
165 | [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ | ||
166 | }, | ||
167 | [ C(OP_WRITE) ] = { | ||
168 | [ C(RESULT_ACCESS) ] = -1, | ||
169 | [ C(RESULT_MISS) ] = -1, | ||
170 | }, | ||
171 | [ C(OP_PREFETCH) ] = { | ||
172 | [ C(RESULT_ACCESS) ] = -1, | ||
173 | [ C(RESULT_MISS) ] = -1, | ||
174 | }, | ||
175 | }, | ||
176 | [ C(BPU ) ] = { | ||
177 | [ C(OP_READ) ] = { | ||
178 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
179 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
180 | }, | ||
181 | [ C(OP_WRITE) ] = { | ||
182 | [ C(RESULT_ACCESS) ] = -1, | ||
183 | [ C(RESULT_MISS) ] = -1, | ||
184 | }, | ||
185 | [ C(OP_PREFETCH) ] = { | ||
186 | [ C(RESULT_ACCESS) ] = -1, | ||
187 | [ C(RESULT_MISS) ] = -1, | ||
188 | }, | ||
189 | }, | ||
190 | }; | ||
191 | |||
192 | static const u64 core2_hw_cache_event_ids | ||
193 | [PERF_COUNT_HW_CACHE_MAX] | ||
194 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
195 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
196 | { | ||
197 | [ C(L1D) ] = { | ||
198 | [ C(OP_READ) ] = { | ||
199 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
200 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
201 | }, | ||
202 | [ C(OP_WRITE) ] = { | ||
203 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
204 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
205 | }, | ||
206 | [ C(OP_PREFETCH) ] = { | ||
207 | [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ | ||
208 | [ C(RESULT_MISS) ] = 0, | ||
209 | }, | ||
210 | }, | ||
211 | [ C(L1I ) ] = { | ||
212 | [ C(OP_READ) ] = { | ||
213 | [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ | ||
214 | [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ | ||
215 | }, | ||
216 | [ C(OP_WRITE) ] = { | ||
217 | [ C(RESULT_ACCESS) ] = -1, | ||
218 | [ C(RESULT_MISS) ] = -1, | ||
219 | }, | ||
220 | [ C(OP_PREFETCH) ] = { | ||
221 | [ C(RESULT_ACCESS) ] = 0, | ||
222 | [ C(RESULT_MISS) ] = 0, | ||
223 | }, | ||
224 | }, | ||
225 | [ C(LL ) ] = { | ||
226 | [ C(OP_READ) ] = { | ||
227 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
228 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
229 | }, | ||
230 | [ C(OP_WRITE) ] = { | ||
231 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
232 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
233 | }, | ||
234 | [ C(OP_PREFETCH) ] = { | ||
235 | [ C(RESULT_ACCESS) ] = 0, | ||
236 | [ C(RESULT_MISS) ] = 0, | ||
237 | }, | ||
238 | }, | ||
239 | [ C(DTLB) ] = { | ||
240 | [ C(OP_READ) ] = { | ||
241 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
242 | [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ | ||
243 | }, | ||
244 | [ C(OP_WRITE) ] = { | ||
245 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
246 | [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ | ||
247 | }, | ||
248 | [ C(OP_PREFETCH) ] = { | ||
249 | [ C(RESULT_ACCESS) ] = 0, | ||
250 | [ C(RESULT_MISS) ] = 0, | ||
251 | }, | ||
252 | }, | ||
253 | [ C(ITLB) ] = { | ||
254 | [ C(OP_READ) ] = { | ||
255 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
256 | [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ | ||
257 | }, | ||
258 | [ C(OP_WRITE) ] = { | ||
259 | [ C(RESULT_ACCESS) ] = -1, | ||
260 | [ C(RESULT_MISS) ] = -1, | ||
261 | }, | ||
262 | [ C(OP_PREFETCH) ] = { | ||
263 | [ C(RESULT_ACCESS) ] = -1, | ||
264 | [ C(RESULT_MISS) ] = -1, | ||
265 | }, | ||
266 | }, | ||
267 | [ C(BPU ) ] = { | ||
268 | [ C(OP_READ) ] = { | ||
269 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
270 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
271 | }, | ||
272 | [ C(OP_WRITE) ] = { | ||
273 | [ C(RESULT_ACCESS) ] = -1, | ||
274 | [ C(RESULT_MISS) ] = -1, | ||
275 | }, | ||
276 | [ C(OP_PREFETCH) ] = { | ||
277 | [ C(RESULT_ACCESS) ] = -1, | ||
278 | [ C(RESULT_MISS) ] = -1, | ||
279 | }, | ||
280 | }, | ||
281 | }; | ||
282 | |||
283 | static const u64 atom_hw_cache_event_ids | ||
284 | [PERF_COUNT_HW_CACHE_MAX] | ||
285 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
286 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
287 | { | ||
288 | [ C(L1D) ] = { | ||
289 | [ C(OP_READ) ] = { | ||
290 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ | ||
291 | [ C(RESULT_MISS) ] = 0, | ||
292 | }, | ||
293 | [ C(OP_WRITE) ] = { | ||
294 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ | ||
295 | [ C(RESULT_MISS) ] = 0, | ||
296 | }, | ||
297 | [ C(OP_PREFETCH) ] = { | ||
298 | [ C(RESULT_ACCESS) ] = 0x0, | ||
299 | [ C(RESULT_MISS) ] = 0, | ||
300 | }, | ||
301 | }, | ||
302 | [ C(L1I ) ] = { | ||
303 | [ C(OP_READ) ] = { | ||
304 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
305 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
306 | }, | ||
307 | [ C(OP_WRITE) ] = { | ||
308 | [ C(RESULT_ACCESS) ] = -1, | ||
309 | [ C(RESULT_MISS) ] = -1, | ||
310 | }, | ||
311 | [ C(OP_PREFETCH) ] = { | ||
312 | [ C(RESULT_ACCESS) ] = 0, | ||
313 | [ C(RESULT_MISS) ] = 0, | ||
314 | }, | ||
315 | }, | ||
316 | [ C(LL ) ] = { | ||
317 | [ C(OP_READ) ] = { | ||
318 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
319 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
320 | }, | ||
321 | [ C(OP_WRITE) ] = { | ||
322 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
323 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
324 | }, | ||
325 | [ C(OP_PREFETCH) ] = { | ||
326 | [ C(RESULT_ACCESS) ] = 0, | ||
327 | [ C(RESULT_MISS) ] = 0, | ||
328 | }, | ||
329 | }, | ||
330 | [ C(DTLB) ] = { | ||
331 | [ C(OP_READ) ] = { | ||
332 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ | ||
333 | [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ | ||
334 | }, | ||
335 | [ C(OP_WRITE) ] = { | ||
336 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ | ||
337 | [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ | ||
338 | }, | ||
339 | [ C(OP_PREFETCH) ] = { | ||
340 | [ C(RESULT_ACCESS) ] = 0, | ||
341 | [ C(RESULT_MISS) ] = 0, | ||
342 | }, | ||
343 | }, | ||
344 | [ C(ITLB) ] = { | ||
345 | [ C(OP_READ) ] = { | ||
346 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
347 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ | ||
348 | }, | ||
349 | [ C(OP_WRITE) ] = { | ||
350 | [ C(RESULT_ACCESS) ] = -1, | ||
351 | [ C(RESULT_MISS) ] = -1, | ||
352 | }, | ||
353 | [ C(OP_PREFETCH) ] = { | ||
354 | [ C(RESULT_ACCESS) ] = -1, | ||
355 | [ C(RESULT_MISS) ] = -1, | ||
356 | }, | ||
357 | }, | ||
358 | [ C(BPU ) ] = { | ||
359 | [ C(OP_READ) ] = { | ||
360 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
361 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
362 | }, | ||
363 | [ C(OP_WRITE) ] = { | ||
364 | [ C(RESULT_ACCESS) ] = -1, | ||
365 | [ C(RESULT_MISS) ] = -1, | ||
366 | }, | ||
367 | [ C(OP_PREFETCH) ] = { | ||
368 | [ C(RESULT_ACCESS) ] = -1, | ||
369 | [ C(RESULT_MISS) ] = -1, | ||
370 | }, | ||
371 | }, | ||
372 | }; | ||
373 | |||
374 | static u64 intel_pmu_raw_event(u64 event) | ||
375 | { | ||
376 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
377 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
378 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
379 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
380 | #define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL | ||
381 | |||
382 | #define CORE_EVNTSEL_MASK \ | ||
383 | (CORE_EVNTSEL_EVENT_MASK | \ | ||
384 | CORE_EVNTSEL_UNIT_MASK | \ | ||
385 | CORE_EVNTSEL_EDGE_MASK | \ | ||
386 | CORE_EVNTSEL_INV_MASK | \ | ||
387 | CORE_EVNTSEL_COUNTER_MASK) | ||
388 | |||
389 | return event & CORE_EVNTSEL_MASK; | ||
390 | } | ||
391 | |||
392 | static const u64 amd_0f_hw_cache_event_ids | ||
393 | [PERF_COUNT_HW_CACHE_MAX] | ||
394 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
395 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
396 | { | ||
397 | [ C(L1D) ] = { | ||
398 | [ C(OP_READ) ] = { | ||
399 | [ C(RESULT_ACCESS) ] = 0, | ||
400 | [ C(RESULT_MISS) ] = 0, | ||
401 | }, | ||
402 | [ C(OP_WRITE) ] = { | ||
403 | [ C(RESULT_ACCESS) ] = 0, | ||
404 | [ C(RESULT_MISS) ] = 0, | ||
405 | }, | ||
406 | [ C(OP_PREFETCH) ] = { | ||
407 | [ C(RESULT_ACCESS) ] = 0, | ||
408 | [ C(RESULT_MISS) ] = 0, | ||
409 | }, | ||
410 | }, | ||
411 | [ C(L1I ) ] = { | ||
412 | [ C(OP_READ) ] = { | ||
413 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ | ||
414 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ | ||
415 | }, | ||
416 | [ C(OP_WRITE) ] = { | ||
417 | [ C(RESULT_ACCESS) ] = -1, | ||
418 | [ C(RESULT_MISS) ] = -1, | ||
419 | }, | ||
420 | [ C(OP_PREFETCH) ] = { | ||
421 | [ C(RESULT_ACCESS) ] = 0, | ||
422 | [ C(RESULT_MISS) ] = 0, | ||
423 | }, | ||
424 | }, | ||
425 | [ C(LL ) ] = { | ||
426 | [ C(OP_READ) ] = { | ||
427 | [ C(RESULT_ACCESS) ] = 0, | ||
428 | [ C(RESULT_MISS) ] = 0, | ||
429 | }, | ||
430 | [ C(OP_WRITE) ] = { | ||
431 | [ C(RESULT_ACCESS) ] = 0, | ||
432 | [ C(RESULT_MISS) ] = 0, | ||
433 | }, | ||
434 | [ C(OP_PREFETCH) ] = { | ||
435 | [ C(RESULT_ACCESS) ] = 0, | ||
436 | [ C(RESULT_MISS) ] = 0, | ||
437 | }, | ||
438 | }, | ||
439 | [ C(DTLB) ] = { | ||
440 | [ C(OP_READ) ] = { | ||
441 | [ C(RESULT_ACCESS) ] = 0, | ||
442 | [ C(RESULT_MISS) ] = 0, | ||
443 | }, | ||
444 | [ C(OP_WRITE) ] = { | ||
445 | [ C(RESULT_ACCESS) ] = 0, | ||
446 | [ C(RESULT_MISS) ] = 0, | ||
447 | }, | ||
448 | [ C(OP_PREFETCH) ] = { | ||
449 | [ C(RESULT_ACCESS) ] = 0, | ||
450 | [ C(RESULT_MISS) ] = 0, | ||
451 | }, | ||
452 | }, | ||
453 | [ C(ITLB) ] = { | ||
454 | [ C(OP_READ) ] = { | ||
455 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | ||
456 | [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ | ||
457 | }, | ||
458 | [ C(OP_WRITE) ] = { | ||
459 | [ C(RESULT_ACCESS) ] = -1, | ||
460 | [ C(RESULT_MISS) ] = -1, | ||
461 | }, | ||
462 | [ C(OP_PREFETCH) ] = { | ||
463 | [ C(RESULT_ACCESS) ] = -1, | ||
464 | [ C(RESULT_MISS) ] = -1, | ||
465 | }, | ||
466 | }, | ||
467 | [ C(BPU ) ] = { | ||
468 | [ C(OP_READ) ] = { | ||
469 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ | ||
470 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ | ||
471 | }, | ||
472 | [ C(OP_WRITE) ] = { | ||
473 | [ C(RESULT_ACCESS) ] = -1, | ||
474 | [ C(RESULT_MISS) ] = -1, | ||
475 | }, | ||
476 | [ C(OP_PREFETCH) ] = { | ||
477 | [ C(RESULT_ACCESS) ] = -1, | ||
478 | [ C(RESULT_MISS) ] = -1, | ||
479 | }, | ||
480 | }, | ||
481 | }; | ||
482 | |||
483 | /* | ||
484 | * AMD Performance Monitor K7 and later. | ||
485 | */ | ||
486 | static const u64 amd_perfmon_event_map[] = | ||
487 | { | ||
488 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | ||
489 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
490 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | ||
491 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | ||
492 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
493 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
494 | }; | ||
495 | |||
496 | static u64 amd_pmu_event_map(int event) | ||
497 | { | ||
498 | return amd_perfmon_event_map[event]; | ||
499 | } | ||
500 | |||
501 | static u64 amd_pmu_raw_event(u64 event) | ||
502 | { | ||
503 | #define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL | ||
504 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | ||
505 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | ||
506 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | ||
507 | #define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL | ||
508 | |||
509 | #define K7_EVNTSEL_MASK \ | ||
510 | (K7_EVNTSEL_EVENT_MASK | \ | ||
511 | K7_EVNTSEL_UNIT_MASK | \ | ||
512 | K7_EVNTSEL_EDGE_MASK | \ | ||
513 | K7_EVNTSEL_INV_MASK | \ | ||
514 | K7_EVNTSEL_COUNTER_MASK) | ||
515 | |||
516 | return event & K7_EVNTSEL_MASK; | ||
517 | } | ||
518 | |||
519 | /* | ||
520 | * Propagate counter elapsed time into the generic counter. | ||
521 | * Can only be executed on the CPU where the counter is active. | ||
522 | * Returns the delta events processed. | ||
523 | */ | ||
524 | static u64 | ||
525 | x86_perf_counter_update(struct perf_counter *counter, | ||
526 | struct hw_perf_counter *hwc, int idx) | ||
527 | { | ||
528 | int shift = 64 - x86_pmu.counter_bits; | ||
529 | u64 prev_raw_count, new_raw_count; | ||
530 | s64 delta; | ||
531 | |||
532 | /* | ||
533 | * Careful: an NMI might modify the previous counter value. | ||
534 | * | ||
535 | * Our tactic to handle this is to first atomically read and | ||
536 | * exchange a new raw count - then add that new-prev delta | ||
537 | * count to the generic counter atomically: | ||
538 | */ | ||
539 | again: | ||
540 | prev_raw_count = atomic64_read(&hwc->prev_count); | ||
541 | rdmsrl(hwc->counter_base + idx, new_raw_count); | ||
542 | |||
543 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
544 | new_raw_count) != prev_raw_count) | ||
545 | goto again; | ||
546 | |||
547 | /* | ||
548 | * Now we have the new raw value and have updated the prev | ||
549 | * timestamp already. We can now calculate the elapsed delta | ||
550 | * (counter-)time and add that to the generic counter. | ||
551 | * | ||
552 | * Careful, not all hw sign-extends above the physical width | ||
553 | * of the count. | ||
554 | */ | ||
555 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | ||
556 | delta >>= shift; | ||
557 | |||
558 | atomic64_add(delta, &counter->count); | ||
559 | atomic64_sub(delta, &hwc->period_left); | ||
560 | |||
561 | return new_raw_count; | ||
562 | } | ||
563 | |||
564 | static atomic_t active_counters; | ||
565 | static DEFINE_MUTEX(pmc_reserve_mutex); | ||
566 | |||
567 | static bool reserve_pmc_hardware(void) | ||
568 | { | ||
569 | int i; | ||
570 | |||
571 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
572 | disable_lapic_nmi_watchdog(); | ||
573 | |||
574 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
575 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | ||
576 | goto perfctr_fail; | ||
577 | } | ||
578 | |||
579 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
580 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | ||
581 | goto eventsel_fail; | ||
582 | } | ||
583 | |||
584 | return true; | ||
585 | |||
586 | eventsel_fail: | ||
587 | for (i--; i >= 0; i--) | ||
588 | release_evntsel_nmi(x86_pmu.eventsel + i); | ||
589 | |||
590 | i = x86_pmu.num_counters; | ||
591 | |||
592 | perfctr_fail: | ||
593 | for (i--; i >= 0; i--) | ||
594 | release_perfctr_nmi(x86_pmu.perfctr + i); | ||
595 | |||
596 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
597 | enable_lapic_nmi_watchdog(); | ||
598 | |||
599 | return false; | ||
600 | } | ||
601 | |||
602 | static void release_pmc_hardware(void) | ||
603 | { | ||
604 | int i; | ||
605 | |||
606 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
607 | release_perfctr_nmi(x86_pmu.perfctr + i); | ||
608 | release_evntsel_nmi(x86_pmu.eventsel + i); | ||
609 | } | ||
610 | |||
611 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
612 | enable_lapic_nmi_watchdog(); | ||
613 | } | ||
614 | |||
615 | static void hw_perf_counter_destroy(struct perf_counter *counter) | ||
616 | { | ||
617 | if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { | ||
618 | release_pmc_hardware(); | ||
619 | mutex_unlock(&pmc_reserve_mutex); | ||
620 | } | ||
621 | } | ||
622 | |||
623 | static inline int x86_pmu_initialized(void) | ||
624 | { | ||
625 | return x86_pmu.handle_irq != NULL; | ||
626 | } | ||
627 | |||
628 | static inline int | ||
629 | set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) | ||
630 | { | ||
631 | unsigned int cache_type, cache_op, cache_result; | ||
632 | u64 config, val; | ||
633 | |||
634 | config = attr->config; | ||
635 | |||
636 | cache_type = (config >> 0) & 0xff; | ||
637 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) | ||
638 | return -EINVAL; | ||
639 | |||
640 | cache_op = (config >> 8) & 0xff; | ||
641 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) | ||
642 | return -EINVAL; | ||
643 | |||
644 | cache_result = (config >> 16) & 0xff; | ||
645 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
646 | return -EINVAL; | ||
647 | |||
648 | val = hw_cache_event_ids[cache_type][cache_op][cache_result]; | ||
649 | |||
650 | if (val == 0) | ||
651 | return -ENOENT; | ||
652 | |||
653 | if (val == -1) | ||
654 | return -EINVAL; | ||
655 | |||
656 | hwc->config |= val; | ||
657 | |||
658 | return 0; | ||
659 | } | ||
660 | |||
661 | /* | ||
662 | * Setup the hardware configuration for a given attr_type | ||
663 | */ | ||
664 | static int __hw_perf_counter_init(struct perf_counter *counter) | ||
665 | { | ||
666 | struct perf_counter_attr *attr = &counter->attr; | ||
667 | struct hw_perf_counter *hwc = &counter->hw; | ||
668 | int err; | ||
669 | |||
670 | if (!x86_pmu_initialized()) | ||
671 | return -ENODEV; | ||
672 | |||
673 | err = 0; | ||
674 | if (!atomic_inc_not_zero(&active_counters)) { | ||
675 | mutex_lock(&pmc_reserve_mutex); | ||
676 | if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware()) | ||
677 | err = -EBUSY; | ||
678 | else | ||
679 | atomic_inc(&active_counters); | ||
680 | mutex_unlock(&pmc_reserve_mutex); | ||
681 | } | ||
682 | if (err) | ||
683 | return err; | ||
684 | |||
685 | /* | ||
686 | * Generate PMC IRQs: | ||
687 | * (keep 'enabled' bit clear for now) | ||
688 | */ | ||
689 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
690 | |||
691 | /* | ||
692 | * Count user and OS events unless requested not to. | ||
693 | */ | ||
694 | if (!attr->exclude_user) | ||
695 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
696 | if (!attr->exclude_kernel) | ||
697 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
698 | |||
699 | if (!hwc->sample_period) { | ||
700 | hwc->sample_period = x86_pmu.max_period; | ||
701 | hwc->last_period = hwc->sample_period; | ||
702 | atomic64_set(&hwc->period_left, hwc->sample_period); | ||
703 | } | ||
704 | |||
705 | counter->destroy = hw_perf_counter_destroy; | ||
706 | |||
707 | /* | ||
708 | * Raw event type provide the config in the event structure | ||
709 | */ | ||
710 | if (attr->type == PERF_TYPE_RAW) { | ||
711 | hwc->config |= x86_pmu.raw_event(attr->config); | ||
712 | return 0; | ||
713 | } | ||
714 | |||
715 | if (attr->type == PERF_TYPE_HW_CACHE) | ||
716 | return set_ext_hw_attr(hwc, attr); | ||
717 | |||
718 | if (attr->config >= x86_pmu.max_events) | ||
719 | return -EINVAL; | ||
720 | /* | ||
721 | * The generic map: | ||
722 | */ | ||
723 | hwc->config |= x86_pmu.event_map(attr->config); | ||
724 | |||
725 | return 0; | ||
726 | } | ||
727 | |||
728 | static void intel_pmu_disable_all(void) | ||
729 | { | ||
730 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | ||
731 | } | ||
732 | |||
733 | static void amd_pmu_disable_all(void) | ||
734 | { | ||
735 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
736 | int idx; | ||
737 | |||
738 | if (!cpuc->enabled) | ||
739 | return; | ||
740 | |||
741 | cpuc->enabled = 0; | ||
742 | /* | ||
743 | * ensure we write the disable before we start disabling the | ||
744 | * counters proper, so that amd_pmu_enable_counter() does the | ||
745 | * right thing. | ||
746 | */ | ||
747 | barrier(); | ||
748 | |||
749 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
750 | u64 val; | ||
751 | |||
752 | if (!test_bit(idx, cpuc->active_mask)) | ||
753 | continue; | ||
754 | rdmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
755 | if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) | ||
756 | continue; | ||
757 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
758 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
759 | } | ||
760 | } | ||
761 | |||
762 | void hw_perf_disable(void) | ||
763 | { | ||
764 | if (!x86_pmu_initialized()) | ||
765 | return; | ||
766 | return x86_pmu.disable_all(); | ||
767 | } | ||
768 | |||
769 | static void intel_pmu_enable_all(void) | ||
770 | { | ||
771 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
772 | } | ||
773 | |||
774 | static void amd_pmu_enable_all(void) | ||
775 | { | ||
776 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
777 | int idx; | ||
778 | |||
779 | if (cpuc->enabled) | ||
780 | return; | ||
781 | |||
782 | cpuc->enabled = 1; | ||
783 | barrier(); | ||
784 | |||
785 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
786 | u64 val; | ||
787 | |||
788 | if (!test_bit(idx, cpuc->active_mask)) | ||
789 | continue; | ||
790 | rdmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
791 | if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) | ||
792 | continue; | ||
793 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
794 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
795 | } | ||
796 | } | ||
797 | |||
798 | void hw_perf_enable(void) | ||
799 | { | ||
800 | if (!x86_pmu_initialized()) | ||
801 | return; | ||
802 | x86_pmu.enable_all(); | ||
803 | } | ||
804 | |||
805 | static inline u64 intel_pmu_get_status(void) | ||
806 | { | ||
807 | u64 status; | ||
808 | |||
809 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
810 | |||
811 | return status; | ||
812 | } | ||
813 | |||
814 | static inline void intel_pmu_ack_status(u64 ack) | ||
815 | { | ||
816 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | ||
817 | } | ||
818 | |||
819 | static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | ||
820 | { | ||
821 | int err; | ||
822 | err = checking_wrmsrl(hwc->config_base + idx, | ||
823 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); | ||
824 | } | ||
825 | |||
826 | static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | ||
827 | { | ||
828 | int err; | ||
829 | err = checking_wrmsrl(hwc->config_base + idx, | ||
830 | hwc->config); | ||
831 | } | ||
832 | |||
833 | static inline void | ||
834 | intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) | ||
835 | { | ||
836 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
837 | u64 ctrl_val, mask; | ||
838 | int err; | ||
839 | |||
840 | mask = 0xfULL << (idx * 4); | ||
841 | |||
842 | rdmsrl(hwc->config_base, ctrl_val); | ||
843 | ctrl_val &= ~mask; | ||
844 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
845 | } | ||
846 | |||
847 | static inline void | ||
848 | intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | ||
849 | { | ||
850 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
851 | intel_pmu_disable_fixed(hwc, idx); | ||
852 | return; | ||
853 | } | ||
854 | |||
855 | x86_pmu_disable_counter(hwc, idx); | ||
856 | } | ||
857 | |||
858 | static inline void | ||
859 | amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | ||
860 | { | ||
861 | x86_pmu_disable_counter(hwc, idx); | ||
862 | } | ||
863 | |||
864 | static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); | ||
865 | |||
866 | /* | ||
867 | * Set the next IRQ period, based on the hwc->period_left value. | ||
868 | * To be called with the counter disabled in hw: | ||
869 | */ | ||
870 | static int | ||
871 | x86_perf_counter_set_period(struct perf_counter *counter, | ||
872 | struct hw_perf_counter *hwc, int idx) | ||
873 | { | ||
874 | s64 left = atomic64_read(&hwc->period_left); | ||
875 | s64 period = hwc->sample_period; | ||
876 | int err, ret = 0; | ||
877 | |||
878 | /* | ||
879 | * If we are way outside a reasoable range then just skip forward: | ||
880 | */ | ||
881 | if (unlikely(left <= -period)) { | ||
882 | left = period; | ||
883 | atomic64_set(&hwc->period_left, left); | ||
884 | hwc->last_period = period; | ||
885 | ret = 1; | ||
886 | } | ||
887 | |||
888 | if (unlikely(left <= 0)) { | ||
889 | left += period; | ||
890 | atomic64_set(&hwc->period_left, left); | ||
891 | hwc->last_period = period; | ||
892 | ret = 1; | ||
893 | } | ||
894 | /* | ||
895 | * Quirk: certain CPUs dont like it if just 1 event is left: | ||
896 | */ | ||
897 | if (unlikely(left < 2)) | ||
898 | left = 2; | ||
899 | |||
900 | if (left > x86_pmu.max_period) | ||
901 | left = x86_pmu.max_period; | ||
902 | |||
903 | per_cpu(prev_left[idx], smp_processor_id()) = left; | ||
904 | |||
905 | /* | ||
906 | * The hw counter starts counting from this counter offset, | ||
907 | * mark it to be able to extra future deltas: | ||
908 | */ | ||
909 | atomic64_set(&hwc->prev_count, (u64)-left); | ||
910 | |||
911 | err = checking_wrmsrl(hwc->counter_base + idx, | ||
912 | (u64)(-left) & x86_pmu.counter_mask); | ||
913 | |||
914 | return ret; | ||
915 | } | ||
916 | |||
917 | static inline void | ||
918 | intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) | ||
919 | { | ||
920 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
921 | u64 ctrl_val, bits, mask; | ||
922 | int err; | ||
923 | |||
924 | /* | ||
925 | * Enable IRQ generation (0x8), | ||
926 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) | ||
927 | * if requested: | ||
928 | */ | ||
929 | bits = 0x8ULL; | ||
930 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | ||
931 | bits |= 0x2; | ||
932 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
933 | bits |= 0x1; | ||
934 | bits <<= (idx * 4); | ||
935 | mask = 0xfULL << (idx * 4); | ||
936 | |||
937 | rdmsrl(hwc->config_base, ctrl_val); | ||
938 | ctrl_val &= ~mask; | ||
939 | ctrl_val |= bits; | ||
940 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
941 | } | ||
942 | |||
943 | static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | ||
944 | { | ||
945 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
946 | intel_pmu_enable_fixed(hwc, idx); | ||
947 | return; | ||
948 | } | ||
949 | |||
950 | x86_pmu_enable_counter(hwc, idx); | ||
951 | } | ||
952 | |||
953 | static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | ||
954 | { | ||
955 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
956 | |||
957 | if (cpuc->enabled) | ||
958 | x86_pmu_enable_counter(hwc, idx); | ||
959 | else | ||
960 | x86_pmu_disable_counter(hwc, idx); | ||
961 | } | ||
962 | |||
963 | static int | ||
964 | fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) | ||
965 | { | ||
966 | unsigned int event; | ||
967 | |||
968 | if (!x86_pmu.num_counters_fixed) | ||
969 | return -1; | ||
970 | |||
971 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; | ||
972 | |||
973 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | ||
974 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | ||
975 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) | ||
976 | return X86_PMC_IDX_FIXED_CPU_CYCLES; | ||
977 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) | ||
978 | return X86_PMC_IDX_FIXED_BUS_CYCLES; | ||
979 | |||
980 | return -1; | ||
981 | } | ||
982 | |||
983 | /* | ||
984 | * Find a PMC slot for the freshly enabled / scheduled in counter: | ||
985 | */ | ||
986 | static int x86_pmu_enable(struct perf_counter *counter) | ||
987 | { | ||
988 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
989 | struct hw_perf_counter *hwc = &counter->hw; | ||
990 | int idx; | ||
991 | |||
992 | idx = fixed_mode_idx(counter, hwc); | ||
993 | if (idx >= 0) { | ||
994 | /* | ||
995 | * Try to get the fixed counter, if that is already taken | ||
996 | * then try to get a generic counter: | ||
997 | */ | ||
998 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
999 | goto try_generic; | ||
1000 | |||
1001 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | ||
1002 | /* | ||
1003 | * We set it so that counter_base + idx in wrmsr/rdmsr maps to | ||
1004 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | ||
1005 | */ | ||
1006 | hwc->counter_base = | ||
1007 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | ||
1008 | hwc->idx = idx; | ||
1009 | } else { | ||
1010 | idx = hwc->idx; | ||
1011 | /* Try to get the previous generic counter again */ | ||
1012 | if (test_and_set_bit(idx, cpuc->used_mask)) { | ||
1013 | try_generic: | ||
1014 | idx = find_first_zero_bit(cpuc->used_mask, | ||
1015 | x86_pmu.num_counters); | ||
1016 | if (idx == x86_pmu.num_counters) | ||
1017 | return -EAGAIN; | ||
1018 | |||
1019 | set_bit(idx, cpuc->used_mask); | ||
1020 | hwc->idx = idx; | ||
1021 | } | ||
1022 | hwc->config_base = x86_pmu.eventsel; | ||
1023 | hwc->counter_base = x86_pmu.perfctr; | ||
1024 | } | ||
1025 | |||
1026 | perf_counters_lapic_init(); | ||
1027 | |||
1028 | x86_pmu.disable(hwc, idx); | ||
1029 | |||
1030 | cpuc->counters[idx] = counter; | ||
1031 | set_bit(idx, cpuc->active_mask); | ||
1032 | |||
1033 | x86_perf_counter_set_period(counter, hwc, idx); | ||
1034 | x86_pmu.enable(hwc, idx); | ||
1035 | |||
1036 | return 0; | ||
1037 | } | ||
1038 | |||
1039 | static void x86_pmu_unthrottle(struct perf_counter *counter) | ||
1040 | { | ||
1041 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1042 | struct hw_perf_counter *hwc = &counter->hw; | ||
1043 | |||
1044 | if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || | ||
1045 | cpuc->counters[hwc->idx] != counter)) | ||
1046 | return; | ||
1047 | |||
1048 | x86_pmu.enable(hwc, hwc->idx); | ||
1049 | } | ||
1050 | |||
1051 | void perf_counter_print_debug(void) | ||
1052 | { | ||
1053 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | ||
1054 | struct cpu_hw_counters *cpuc; | ||
1055 | unsigned long flags; | ||
1056 | int cpu, idx; | ||
1057 | |||
1058 | if (!x86_pmu.num_counters) | ||
1059 | return; | ||
1060 | |||
1061 | local_irq_save(flags); | ||
1062 | |||
1063 | cpu = smp_processor_id(); | ||
1064 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
1065 | |||
1066 | if (x86_pmu.version >= 2) { | ||
1067 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); | ||
1068 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
1069 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | ||
1070 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | ||
1071 | |||
1072 | pr_info("\n"); | ||
1073 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | ||
1074 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | ||
1075 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | ||
1076 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | ||
1077 | } | ||
1078 | pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); | ||
1079 | |||
1080 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
1081 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | ||
1082 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | ||
1083 | |||
1084 | prev_left = per_cpu(prev_left[idx], cpu); | ||
1085 | |||
1086 | pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", | ||
1087 | cpu, idx, pmc_ctrl); | ||
1088 | pr_info("CPU#%d: gen-PMC%d count: %016llx\n", | ||
1089 | cpu, idx, pmc_count); | ||
1090 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", | ||
1091 | cpu, idx, prev_left); | ||
1092 | } | ||
1093 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { | ||
1094 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); | ||
1095 | |||
1096 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", | ||
1097 | cpu, idx, pmc_count); | ||
1098 | } | ||
1099 | local_irq_restore(flags); | ||
1100 | } | ||
1101 | |||
1102 | static void x86_pmu_disable(struct perf_counter *counter) | ||
1103 | { | ||
1104 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1105 | struct hw_perf_counter *hwc = &counter->hw; | ||
1106 | int idx = hwc->idx; | ||
1107 | |||
1108 | /* | ||
1109 | * Must be done before we disable, otherwise the nmi handler | ||
1110 | * could reenable again: | ||
1111 | */ | ||
1112 | clear_bit(idx, cpuc->active_mask); | ||
1113 | x86_pmu.disable(hwc, idx); | ||
1114 | |||
1115 | /* | ||
1116 | * Make sure the cleared pointer becomes visible before we | ||
1117 | * (potentially) free the counter: | ||
1118 | */ | ||
1119 | barrier(); | ||
1120 | |||
1121 | /* | ||
1122 | * Drain the remaining delta count out of a counter | ||
1123 | * that we are disabling: | ||
1124 | */ | ||
1125 | x86_perf_counter_update(counter, hwc, idx); | ||
1126 | cpuc->counters[idx] = NULL; | ||
1127 | clear_bit(idx, cpuc->used_mask); | ||
1128 | } | ||
1129 | |||
1130 | /* | ||
1131 | * Save and restart an expired counter. Called by NMI contexts, | ||
1132 | * so it has to be careful about preempting normal counter ops: | ||
1133 | */ | ||
1134 | static int intel_pmu_save_and_restart(struct perf_counter *counter) | ||
1135 | { | ||
1136 | struct hw_perf_counter *hwc = &counter->hw; | ||
1137 | int idx = hwc->idx; | ||
1138 | int ret; | ||
1139 | |||
1140 | x86_perf_counter_update(counter, hwc, idx); | ||
1141 | ret = x86_perf_counter_set_period(counter, hwc, idx); | ||
1142 | |||
1143 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) | ||
1144 | intel_pmu_enable_counter(hwc, idx); | ||
1145 | |||
1146 | return ret; | ||
1147 | } | ||
1148 | |||
1149 | static void intel_pmu_reset(void) | ||
1150 | { | ||
1151 | unsigned long flags; | ||
1152 | int idx; | ||
1153 | |||
1154 | if (!x86_pmu.num_counters) | ||
1155 | return; | ||
1156 | |||
1157 | local_irq_save(flags); | ||
1158 | |||
1159 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | ||
1160 | |||
1161 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
1162 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | ||
1163 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | ||
1164 | } | ||
1165 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { | ||
1166 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | ||
1167 | } | ||
1168 | |||
1169 | local_irq_restore(flags); | ||
1170 | } | ||
1171 | |||
1172 | |||
1173 | /* | ||
1174 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
1175 | * rules apply: | ||
1176 | */ | ||
1177 | static int intel_pmu_handle_irq(struct pt_regs *regs) | ||
1178 | { | ||
1179 | struct perf_sample_data data; | ||
1180 | struct cpu_hw_counters *cpuc; | ||
1181 | int bit, cpu, loops; | ||
1182 | u64 ack, status; | ||
1183 | |||
1184 | data.regs = regs; | ||
1185 | data.addr = 0; | ||
1186 | |||
1187 | cpu = smp_processor_id(); | ||
1188 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
1189 | |||
1190 | perf_disable(); | ||
1191 | status = intel_pmu_get_status(); | ||
1192 | if (!status) { | ||
1193 | perf_enable(); | ||
1194 | return 0; | ||
1195 | } | ||
1196 | |||
1197 | loops = 0; | ||
1198 | again: | ||
1199 | if (++loops > 100) { | ||
1200 | WARN_ONCE(1, "perfcounters: irq loop stuck!\n"); | ||
1201 | perf_counter_print_debug(); | ||
1202 | intel_pmu_reset(); | ||
1203 | perf_enable(); | ||
1204 | return 1; | ||
1205 | } | ||
1206 | |||
1207 | inc_irq_stat(apic_perf_irqs); | ||
1208 | ack = status; | ||
1209 | for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
1210 | struct perf_counter *counter = cpuc->counters[bit]; | ||
1211 | |||
1212 | clear_bit(bit, (unsigned long *) &status); | ||
1213 | if (!test_bit(bit, cpuc->active_mask)) | ||
1214 | continue; | ||
1215 | |||
1216 | if (!intel_pmu_save_and_restart(counter)) | ||
1217 | continue; | ||
1218 | |||
1219 | if (perf_counter_overflow(counter, 1, &data)) | ||
1220 | intel_pmu_disable_counter(&counter->hw, bit); | ||
1221 | } | ||
1222 | |||
1223 | intel_pmu_ack_status(ack); | ||
1224 | |||
1225 | /* | ||
1226 | * Repeat if there is more work to be done: | ||
1227 | */ | ||
1228 | status = intel_pmu_get_status(); | ||
1229 | if (status) | ||
1230 | goto again; | ||
1231 | |||
1232 | perf_enable(); | ||
1233 | |||
1234 | return 1; | ||
1235 | } | ||
1236 | |||
1237 | static int amd_pmu_handle_irq(struct pt_regs *regs) | ||
1238 | { | ||
1239 | struct perf_sample_data data; | ||
1240 | struct cpu_hw_counters *cpuc; | ||
1241 | struct perf_counter *counter; | ||
1242 | struct hw_perf_counter *hwc; | ||
1243 | int cpu, idx, handled = 0; | ||
1244 | u64 val; | ||
1245 | |||
1246 | data.regs = regs; | ||
1247 | data.addr = 0; | ||
1248 | |||
1249 | cpu = smp_processor_id(); | ||
1250 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
1251 | |||
1252 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
1253 | if (!test_bit(idx, cpuc->active_mask)) | ||
1254 | continue; | ||
1255 | |||
1256 | counter = cpuc->counters[idx]; | ||
1257 | hwc = &counter->hw; | ||
1258 | |||
1259 | val = x86_perf_counter_update(counter, hwc, idx); | ||
1260 | if (val & (1ULL << (x86_pmu.counter_bits - 1))) | ||
1261 | continue; | ||
1262 | |||
1263 | /* | ||
1264 | * counter overflow | ||
1265 | */ | ||
1266 | handled = 1; | ||
1267 | data.period = counter->hw.last_period; | ||
1268 | |||
1269 | if (!x86_perf_counter_set_period(counter, hwc, idx)) | ||
1270 | continue; | ||
1271 | |||
1272 | if (perf_counter_overflow(counter, 1, &data)) | ||
1273 | amd_pmu_disable_counter(hwc, idx); | ||
1274 | } | ||
1275 | |||
1276 | if (handled) | ||
1277 | inc_irq_stat(apic_perf_irqs); | ||
1278 | |||
1279 | return handled; | ||
1280 | } | ||
1281 | |||
1282 | void smp_perf_pending_interrupt(struct pt_regs *regs) | ||
1283 | { | ||
1284 | irq_enter(); | ||
1285 | ack_APIC_irq(); | ||
1286 | inc_irq_stat(apic_pending_irqs); | ||
1287 | perf_counter_do_pending(); | ||
1288 | irq_exit(); | ||
1289 | } | ||
1290 | |||
1291 | void set_perf_counter_pending(void) | ||
1292 | { | ||
1293 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); | ||
1294 | } | ||
1295 | |||
1296 | void perf_counters_lapic_init(void) | ||
1297 | { | ||
1298 | if (!x86_pmu_initialized()) | ||
1299 | return; | ||
1300 | |||
1301 | /* | ||
1302 | * Always use NMI for PMU | ||
1303 | */ | ||
1304 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1305 | } | ||
1306 | |||
1307 | static int __kprobes | ||
1308 | perf_counter_nmi_handler(struct notifier_block *self, | ||
1309 | unsigned long cmd, void *__args) | ||
1310 | { | ||
1311 | struct die_args *args = __args; | ||
1312 | struct pt_regs *regs; | ||
1313 | |||
1314 | if (!atomic_read(&active_counters)) | ||
1315 | return NOTIFY_DONE; | ||
1316 | |||
1317 | switch (cmd) { | ||
1318 | case DIE_NMI: | ||
1319 | case DIE_NMI_IPI: | ||
1320 | break; | ||
1321 | |||
1322 | default: | ||
1323 | return NOTIFY_DONE; | ||
1324 | } | ||
1325 | |||
1326 | regs = args->regs; | ||
1327 | |||
1328 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1329 | /* | ||
1330 | * Can't rely on the handled return value to say it was our NMI, two | ||
1331 | * counters could trigger 'simultaneously' raising two back-to-back NMIs. | ||
1332 | * | ||
1333 | * If the first NMI handles both, the latter will be empty and daze | ||
1334 | * the CPU. | ||
1335 | */ | ||
1336 | x86_pmu.handle_irq(regs); | ||
1337 | |||
1338 | return NOTIFY_STOP; | ||
1339 | } | ||
1340 | |||
1341 | static __read_mostly struct notifier_block perf_counter_nmi_notifier = { | ||
1342 | .notifier_call = perf_counter_nmi_handler, | ||
1343 | .next = NULL, | ||
1344 | .priority = 1 | ||
1345 | }; | ||
1346 | |||
1347 | static struct x86_pmu intel_pmu = { | ||
1348 | .name = "Intel", | ||
1349 | .handle_irq = intel_pmu_handle_irq, | ||
1350 | .disable_all = intel_pmu_disable_all, | ||
1351 | .enable_all = intel_pmu_enable_all, | ||
1352 | .enable = intel_pmu_enable_counter, | ||
1353 | .disable = intel_pmu_disable_counter, | ||
1354 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
1355 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
1356 | .event_map = intel_pmu_event_map, | ||
1357 | .raw_event = intel_pmu_raw_event, | ||
1358 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
1359 | /* | ||
1360 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
1361 | * so we install an artificial 1<<31 period regardless of | ||
1362 | * the generic counter period: | ||
1363 | */ | ||
1364 | .max_period = (1ULL << 31) - 1, | ||
1365 | }; | ||
1366 | |||
1367 | static struct x86_pmu amd_pmu = { | ||
1368 | .name = "AMD", | ||
1369 | .handle_irq = amd_pmu_handle_irq, | ||
1370 | .disable_all = amd_pmu_disable_all, | ||
1371 | .enable_all = amd_pmu_enable_all, | ||
1372 | .enable = amd_pmu_enable_counter, | ||
1373 | .disable = amd_pmu_disable_counter, | ||
1374 | .eventsel = MSR_K7_EVNTSEL0, | ||
1375 | .perfctr = MSR_K7_PERFCTR0, | ||
1376 | .event_map = amd_pmu_event_map, | ||
1377 | .raw_event = amd_pmu_raw_event, | ||
1378 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
1379 | .num_counters = 4, | ||
1380 | .counter_bits = 48, | ||
1381 | .counter_mask = (1ULL << 48) - 1, | ||
1382 | /* use highest bit to detect overflow */ | ||
1383 | .max_period = (1ULL << 47) - 1, | ||
1384 | }; | ||
1385 | |||
1386 | static int intel_pmu_init(void) | ||
1387 | { | ||
1388 | union cpuid10_edx edx; | ||
1389 | union cpuid10_eax eax; | ||
1390 | unsigned int unused; | ||
1391 | unsigned int ebx; | ||
1392 | int version; | ||
1393 | |||
1394 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
1395 | return -ENODEV; | ||
1396 | |||
1397 | /* | ||
1398 | * Check whether the Architectural PerfMon supports | ||
1399 | * Branch Misses Retired Event or not. | ||
1400 | */ | ||
1401 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | ||
1402 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
1403 | return -ENODEV; | ||
1404 | |||
1405 | version = eax.split.version_id; | ||
1406 | if (version < 2) | ||
1407 | return -ENODEV; | ||
1408 | |||
1409 | x86_pmu = intel_pmu; | ||
1410 | x86_pmu.version = version; | ||
1411 | x86_pmu.num_counters = eax.split.num_counters; | ||
1412 | x86_pmu.counter_bits = eax.split.bit_width; | ||
1413 | x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1; | ||
1414 | |||
1415 | /* | ||
1416 | * Quirk: v2 perfmon does not report fixed-purpose counters, so | ||
1417 | * assume at least 3 counters: | ||
1418 | */ | ||
1419 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); | ||
1420 | |||
1421 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
1422 | |||
1423 | /* | ||
1424 | * Install the hw-cache-events table: | ||
1425 | */ | ||
1426 | switch (boot_cpu_data.x86_model) { | ||
1427 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | ||
1428 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | ||
1429 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | ||
1430 | case 29: /* six-core 45 nm xeon "Dunnington" */ | ||
1431 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | ||
1432 | sizeof(hw_cache_event_ids)); | ||
1433 | |||
1434 | pr_cont("Core2 events, "); | ||
1435 | break; | ||
1436 | default: | ||
1437 | case 26: | ||
1438 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | ||
1439 | sizeof(hw_cache_event_ids)); | ||
1440 | |||
1441 | pr_cont("Nehalem/Corei7 events, "); | ||
1442 | break; | ||
1443 | case 28: | ||
1444 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | ||
1445 | sizeof(hw_cache_event_ids)); | ||
1446 | |||
1447 | pr_cont("Atom events, "); | ||
1448 | break; | ||
1449 | } | ||
1450 | return 0; | ||
1451 | } | ||
1452 | |||
1453 | static int amd_pmu_init(void) | ||
1454 | { | ||
1455 | x86_pmu = amd_pmu; | ||
1456 | |||
1457 | switch (boot_cpu_data.x86) { | ||
1458 | case 0x0f: | ||
1459 | case 0x10: | ||
1460 | case 0x11: | ||
1461 | memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids, | ||
1462 | sizeof(hw_cache_event_ids)); | ||
1463 | |||
1464 | pr_cont("AMD Family 0f/10/11 events, "); | ||
1465 | break; | ||
1466 | } | ||
1467 | return 0; | ||
1468 | } | ||
1469 | |||
1470 | void __init init_hw_perf_counters(void) | ||
1471 | { | ||
1472 | int err; | ||
1473 | |||
1474 | pr_info("Performance Counters: "); | ||
1475 | |||
1476 | switch (boot_cpu_data.x86_vendor) { | ||
1477 | case X86_VENDOR_INTEL: | ||
1478 | err = intel_pmu_init(); | ||
1479 | break; | ||
1480 | case X86_VENDOR_AMD: | ||
1481 | err = amd_pmu_init(); | ||
1482 | break; | ||
1483 | default: | ||
1484 | return; | ||
1485 | } | ||
1486 | if (err != 0) { | ||
1487 | pr_cont("no PMU driver, software counters only.\n"); | ||
1488 | return; | ||
1489 | } | ||
1490 | |||
1491 | pr_cont("%s PMU driver.\n", x86_pmu.name); | ||
1492 | |||
1493 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | ||
1494 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; | ||
1495 | WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", | ||
1496 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); | ||
1497 | } | ||
1498 | perf_counter_mask = (1 << x86_pmu.num_counters) - 1; | ||
1499 | perf_max_counters = x86_pmu.num_counters; | ||
1500 | |||
1501 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { | ||
1502 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; | ||
1503 | WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", | ||
1504 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); | ||
1505 | } | ||
1506 | |||
1507 | perf_counter_mask |= | ||
1508 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; | ||
1509 | |||
1510 | perf_counters_lapic_init(); | ||
1511 | register_die_notifier(&perf_counter_nmi_notifier); | ||
1512 | |||
1513 | pr_info("... version: %d\n", x86_pmu.version); | ||
1514 | pr_info("... bit width: %d\n", x86_pmu.counter_bits); | ||
1515 | pr_info("... generic counters: %d\n", x86_pmu.num_counters); | ||
1516 | pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask); | ||
1517 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | ||
1518 | pr_info("... fixed-purpose counters: %d\n", x86_pmu.num_counters_fixed); | ||
1519 | pr_info("... counter mask: %016Lx\n", perf_counter_mask); | ||
1520 | } | ||
1521 | |||
1522 | static inline void x86_pmu_read(struct perf_counter *counter) | ||
1523 | { | ||
1524 | x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); | ||
1525 | } | ||
1526 | |||
1527 | static const struct pmu pmu = { | ||
1528 | .enable = x86_pmu_enable, | ||
1529 | .disable = x86_pmu_disable, | ||
1530 | .read = x86_pmu_read, | ||
1531 | .unthrottle = x86_pmu_unthrottle, | ||
1532 | }; | ||
1533 | |||
1534 | const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | ||
1535 | { | ||
1536 | int err; | ||
1537 | |||
1538 | err = __hw_perf_counter_init(counter); | ||
1539 | if (err) | ||
1540 | return ERR_PTR(err); | ||
1541 | |||
1542 | return &pmu; | ||
1543 | } | ||
1544 | |||
1545 | /* | ||
1546 | * callchain support | ||
1547 | */ | ||
1548 | |||
1549 | static inline | ||
1550 | void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) | ||
1551 | { | ||
1552 | if (entry->nr < MAX_STACK_DEPTH) | ||
1553 | entry->ip[entry->nr++] = ip; | ||
1554 | } | ||
1555 | |||
1556 | static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); | ||
1557 | static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); | ||
1558 | |||
1559 | |||
1560 | static void | ||
1561 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
1562 | { | ||
1563 | /* Ignore warnings */ | ||
1564 | } | ||
1565 | |||
1566 | static void backtrace_warning(void *data, char *msg) | ||
1567 | { | ||
1568 | /* Ignore warnings */ | ||
1569 | } | ||
1570 | |||
1571 | static int backtrace_stack(void *data, char *name) | ||
1572 | { | ||
1573 | /* Don't bother with IRQ stacks for now */ | ||
1574 | return -1; | ||
1575 | } | ||
1576 | |||
1577 | static void backtrace_address(void *data, unsigned long addr, int reliable) | ||
1578 | { | ||
1579 | struct perf_callchain_entry *entry = data; | ||
1580 | |||
1581 | if (reliable) | ||
1582 | callchain_store(entry, addr); | ||
1583 | } | ||
1584 | |||
1585 | static const struct stacktrace_ops backtrace_ops = { | ||
1586 | .warning = backtrace_warning, | ||
1587 | .warning_symbol = backtrace_warning_symbol, | ||
1588 | .stack = backtrace_stack, | ||
1589 | .address = backtrace_address, | ||
1590 | }; | ||
1591 | |||
1592 | static void | ||
1593 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1594 | { | ||
1595 | unsigned long bp; | ||
1596 | char *stack; | ||
1597 | int nr = entry->nr; | ||
1598 | |||
1599 | callchain_store(entry, instruction_pointer(regs)); | ||
1600 | |||
1601 | stack = ((char *)regs + sizeof(struct pt_regs)); | ||
1602 | #ifdef CONFIG_FRAME_POINTER | ||
1603 | bp = frame_pointer(regs); | ||
1604 | #else | ||
1605 | bp = 0; | ||
1606 | #endif | ||
1607 | |||
1608 | dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); | ||
1609 | |||
1610 | entry->kernel = entry->nr - nr; | ||
1611 | } | ||
1612 | |||
1613 | |||
1614 | struct stack_frame { | ||
1615 | const void __user *next_fp; | ||
1616 | unsigned long return_address; | ||
1617 | }; | ||
1618 | |||
1619 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | ||
1620 | { | ||
1621 | int ret; | ||
1622 | |||
1623 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | ||
1624 | return 0; | ||
1625 | |||
1626 | ret = 1; | ||
1627 | pagefault_disable(); | ||
1628 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
1629 | ret = 0; | ||
1630 | pagefault_enable(); | ||
1631 | |||
1632 | return ret; | ||
1633 | } | ||
1634 | |||
1635 | static void | ||
1636 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1637 | { | ||
1638 | struct stack_frame frame; | ||
1639 | const void __user *fp; | ||
1640 | int nr = entry->nr; | ||
1641 | |||
1642 | regs = (struct pt_regs *)current->thread.sp0 - 1; | ||
1643 | fp = (void __user *)regs->bp; | ||
1644 | |||
1645 | callchain_store(entry, regs->ip); | ||
1646 | |||
1647 | while (entry->nr < MAX_STACK_DEPTH) { | ||
1648 | frame.next_fp = NULL; | ||
1649 | frame.return_address = 0; | ||
1650 | |||
1651 | if (!copy_stack_frame(fp, &frame)) | ||
1652 | break; | ||
1653 | |||
1654 | if ((unsigned long)fp < user_stack_pointer(regs)) | ||
1655 | break; | ||
1656 | |||
1657 | callchain_store(entry, frame.return_address); | ||
1658 | fp = frame.next_fp; | ||
1659 | } | ||
1660 | |||
1661 | entry->user = entry->nr - nr; | ||
1662 | } | ||
1663 | |||
1664 | static void | ||
1665 | perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1666 | { | ||
1667 | int is_user; | ||
1668 | |||
1669 | if (!regs) | ||
1670 | return; | ||
1671 | |||
1672 | is_user = user_mode(regs); | ||
1673 | |||
1674 | if (!current || current->pid == 0) | ||
1675 | return; | ||
1676 | |||
1677 | if (is_user && current->state != TASK_RUNNING) | ||
1678 | return; | ||
1679 | |||
1680 | if (!is_user) | ||
1681 | perf_callchain_kernel(regs, entry); | ||
1682 | |||
1683 | if (current->mm) | ||
1684 | perf_callchain_user(regs, entry); | ||
1685 | } | ||
1686 | |||
1687 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
1688 | { | ||
1689 | struct perf_callchain_entry *entry; | ||
1690 | |||
1691 | if (in_nmi()) | ||
1692 | entry = &__get_cpu_var(nmi_entry); | ||
1693 | else | ||
1694 | entry = &__get_cpu_var(irq_entry); | ||
1695 | |||
1696 | entry->nr = 0; | ||
1697 | entry->hv = 0; | ||
1698 | entry->kernel = 0; | ||
1699 | entry->user = 0; | ||
1700 | |||
1701 | perf_do_callchain(regs, entry); | ||
1702 | |||
1703 | return entry; | ||
1704 | } | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index f6c70a164e32..d6f5b9fbde32 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -19,8 +19,8 @@ | |||
19 | #include <linux/nmi.h> | 19 | #include <linux/nmi.h> |
20 | #include <linux/kprobes.h> | 20 | #include <linux/kprobes.h> |
21 | 21 | ||
22 | #include <asm/genapic.h> | 22 | #include <asm/apic.h> |
23 | #include <asm/intel_arch_perfmon.h> | 23 | #include <asm/perf_counter.h> |
24 | 24 | ||
25 | struct nmi_watchdog_ctlblk { | 25 | struct nmi_watchdog_ctlblk { |
26 | unsigned int cccr_msr; | 26 | unsigned int cccr_msr; |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1c17d7c751a4..a4742a340d8d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -1012,6 +1012,11 @@ apicinterrupt ERROR_APIC_VECTOR \ | |||
1012 | apicinterrupt SPURIOUS_APIC_VECTOR \ | 1012 | apicinterrupt SPURIOUS_APIC_VECTOR \ |
1013 | spurious_interrupt smp_spurious_interrupt | 1013 | spurious_interrupt smp_spurious_interrupt |
1014 | 1014 | ||
1015 | #ifdef CONFIG_PERF_COUNTERS | ||
1016 | apicinterrupt LOCAL_PENDING_VECTOR \ | ||
1017 | perf_pending_interrupt smp_perf_pending_interrupt | ||
1018 | #endif | ||
1019 | |||
1015 | /* | 1020 | /* |
1016 | * Exception entry points. | 1021 | * Exception entry points. |
1017 | */ | 1022 | */ |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 9a391bbb8ba8..38287b5f116e 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -62,6 +62,14 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
62 | for_each_online_cpu(j) | 62 | for_each_online_cpu(j) |
63 | seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); | 63 | seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); |
64 | seq_printf(p, " Spurious interrupts\n"); | 64 | seq_printf(p, " Spurious interrupts\n"); |
65 | seq_printf(p, "%*s: ", prec, "CNT"); | ||
66 | for_each_online_cpu(j) | ||
67 | seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); | ||
68 | seq_printf(p, " Performance counter interrupts\n"); | ||
69 | seq_printf(p, "%*s: ", prec, "PND"); | ||
70 | for_each_online_cpu(j) | ||
71 | seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); | ||
72 | seq_printf(p, " Performance pending work\n"); | ||
65 | #endif | 73 | #endif |
66 | if (generic_interrupt_extension) { | 74 | if (generic_interrupt_extension) { |
67 | seq_printf(p, "%*s: ", prec, "PLT"); | 75 | seq_printf(p, "%*s: ", prec, "PLT"); |
@@ -165,6 +173,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
165 | #ifdef CONFIG_X86_LOCAL_APIC | 173 | #ifdef CONFIG_X86_LOCAL_APIC |
166 | sum += irq_stats(cpu)->apic_timer_irqs; | 174 | sum += irq_stats(cpu)->apic_timer_irqs; |
167 | sum += irq_stats(cpu)->irq_spurious_count; | 175 | sum += irq_stats(cpu)->irq_spurious_count; |
176 | sum += irq_stats(cpu)->apic_perf_irqs; | ||
177 | sum += irq_stats(cpu)->apic_pending_irqs; | ||
168 | #endif | 178 | #endif |
169 | if (generic_interrupt_extension) | 179 | if (generic_interrupt_extension) |
170 | sum += irq_stats(cpu)->generic_irqs; | 180 | sum += irq_stats(cpu)->generic_irqs; |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 2e08b10ad51a..267c6624c77f 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -181,10 +181,15 @@ static void __init apic_intr_init(void) | |||
181 | { | 181 | { |
182 | smp_intr_init(); | 182 | smp_intr_init(); |
183 | 183 | ||
184 | #ifdef CONFIG_X86_64 | 184 | #ifdef CONFIG_X86_THERMAL_VECTOR |
185 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | 185 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); |
186 | #endif | ||
187 | #ifdef CONFIG_X86_THRESHOLD | ||
186 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | 188 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); |
187 | #endif | 189 | #endif |
190 | #if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC) | ||
191 | alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt); | ||
192 | #endif | ||
188 | 193 | ||
189 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) | 194 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) |
190 | /* self generated IPI for local APIC timer */ | 195 | /* self generated IPI for local APIC timer */ |
@@ -199,18 +204,10 @@ static void __init apic_intr_init(void) | |||
199 | 204 | ||
200 | /* Performance monitoring interrupts: */ | 205 | /* Performance monitoring interrupts: */ |
201 | # ifdef CONFIG_PERF_COUNTERS | 206 | # ifdef CONFIG_PERF_COUNTERS |
202 | alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); | ||
203 | alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); | 207 | alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); |
204 | # endif | 208 | # endif |
205 | 209 | ||
206 | #endif | 210 | #endif |
207 | |||
208 | #ifdef CONFIG_X86_32 | ||
209 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) | ||
210 | /* thermal monitor LVT interrupt */ | ||
211 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | ||
212 | #endif | ||
213 | #endif | ||
214 | } | 211 | } |
215 | 212 | ||
216 | /** | 213 | /** |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 14425166b8e3..0a813b17b172 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -6,7 +6,6 @@ | |||
6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | 6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes |
7 | * 2000-2002 x86-64 support by Andi Kleen | 7 | * 2000-2002 x86-64 support by Andi Kleen |
8 | */ | 8 | */ |
9 | |||
10 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
11 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
12 | #include <linux/smp.h> | 11 | #include <linux/smp.h> |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 734f92c02dde..d51321ddafda 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -335,3 +335,4 @@ ENTRY(sys_call_table) | |||
335 | .long sys_preadv | 335 | .long sys_preadv |
336 | .long sys_pwritev | 336 | .long sys_pwritev |
337 | .long sys_rt_tgsigqueueinfo /* 335 */ | 337 | .long sys_rt_tgsigqueueinfo /* 335 */ |
338 | .long sys_perf_counter_open | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ede024531f8f..07d60c870ce2 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -942,8 +942,13 @@ void __init trap_init(void) | |||
942 | #endif | 942 | #endif |
943 | set_intr_gate(19, &simd_coprocessor_error); | 943 | set_intr_gate(19, &simd_coprocessor_error); |
944 | 944 | ||
945 | /* Reserve all the builtin and the syscall vector: */ | ||
946 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) | ||
947 | set_bit(i, used_vectors); | ||
948 | |||
945 | #ifdef CONFIG_IA32_EMULATION | 949 | #ifdef CONFIG_IA32_EMULATION |
946 | set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); | 950 | set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); |
951 | set_bit(IA32_SYSCALL_VECTOR, used_vectors); | ||
947 | #endif | 952 | #endif |
948 | 953 | ||
949 | #ifdef CONFIG_X86_32 | 954 | #ifdef CONFIG_X86_32 |
@@ -960,14 +965,9 @@ void __init trap_init(void) | |||
960 | } | 965 | } |
961 | 966 | ||
962 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); | 967 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); |
968 | set_bit(SYSCALL_VECTOR, used_vectors); | ||
963 | #endif | 969 | #endif |
964 | 970 | ||
965 | /* Reserve all the builtin and the syscall vector: */ | ||
966 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) | ||
967 | set_bit(i, used_vectors); | ||
968 | |||
969 | set_bit(IA32_SYSCALL_VECTOR, used_vectors); | ||
970 | |||
971 | /* | 971 | /* |
972 | * Should be a barrier for any external CPU state: | 972 | * Should be a barrier for any external CPU state: |
973 | */ | 973 | */ |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 5ec7ae366615..c6acc6326374 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/bootmem.h> /* max_low_pfn */ | 10 | #include <linux/bootmem.h> /* max_low_pfn */ |
11 | #include <linux/kprobes.h> /* __kprobes, ... */ | 11 | #include <linux/kprobes.h> /* __kprobes, ... */ |
12 | #include <linux/mmiotrace.h> /* kmmio_handler, ... */ | 12 | #include <linux/mmiotrace.h> /* kmmio_handler, ... */ |
13 | #include <linux/perf_counter.h> /* perf_swcounter_event */ | ||
13 | 14 | ||
14 | #include <asm/traps.h> /* dotraplinkage, ... */ | 15 | #include <asm/traps.h> /* dotraplinkage, ... */ |
15 | #include <asm/pgalloc.h> /* pgd_*(), ... */ | 16 | #include <asm/pgalloc.h> /* pgd_*(), ... */ |
@@ -1013,6 +1014,8 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1013 | if (unlikely(error_code & PF_RSVD)) | 1014 | if (unlikely(error_code & PF_RSVD)) |
1014 | pgtable_bad(regs, error_code, address); | 1015 | pgtable_bad(regs, error_code, address); |
1015 | 1016 | ||
1017 | perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); | ||
1018 | |||
1016 | /* | 1019 | /* |
1017 | * If we're in an interrupt, have no user context or are running | 1020 | * If we're in an interrupt, have no user context or are running |
1018 | * in an atomic region then we must not take the fault: | 1021 | * in an atomic region then we must not take the fault: |
@@ -1106,10 +1109,15 @@ good_area: | |||
1106 | return; | 1109 | return; |
1107 | } | 1110 | } |
1108 | 1111 | ||
1109 | if (fault & VM_FAULT_MAJOR) | 1112 | if (fault & VM_FAULT_MAJOR) { |
1110 | tsk->maj_flt++; | 1113 | tsk->maj_flt++; |
1111 | else | 1114 | perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, |
1115 | regs, address); | ||
1116 | } else { | ||
1112 | tsk->min_flt++; | 1117 | tsk->min_flt++; |
1118 | perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, | ||
1119 | regs, address); | ||
1120 | } | ||
1113 | 1121 | ||
1114 | check_v8086_mode(regs, address, tsk); | 1122 | check_v8086_mode(regs, address, tsk); |
1115 | 1123 | ||
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index c0bedcd10f97..18d244f70205 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c | |||
@@ -40,21 +40,20 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) | |||
40 | 40 | ||
41 | static void __init memtest(u64 pattern, u64 start_phys, u64 size) | 41 | static void __init memtest(u64 pattern, u64 start_phys, u64 size) |
42 | { | 42 | { |
43 | u64 *p; | 43 | u64 *p, *start, *end; |
44 | void *start, *end; | ||
45 | u64 start_bad, last_bad; | 44 | u64 start_bad, last_bad; |
46 | u64 start_phys_aligned; | 45 | u64 start_phys_aligned; |
47 | size_t incr; | 46 | const size_t incr = sizeof(pattern); |
48 | 47 | ||
49 | incr = sizeof(pattern); | ||
50 | start_phys_aligned = ALIGN(start_phys, incr); | 48 | start_phys_aligned = ALIGN(start_phys, incr); |
51 | start = __va(start_phys_aligned); | 49 | start = __va(start_phys_aligned); |
52 | end = start + size - (start_phys_aligned - start_phys); | 50 | end = start + (size - (start_phys_aligned - start_phys)) / incr; |
53 | start_bad = 0; | 51 | start_bad = 0; |
54 | last_bad = 0; | 52 | last_bad = 0; |
55 | 53 | ||
56 | for (p = start; p < end; p++) | 54 | for (p = start; p < end; p++) |
57 | *p = pattern; | 55 | *p = pattern; |
56 | |||
58 | for (p = start; p < end; p++, start_phys_aligned += incr) { | 57 | for (p = start; p < end; p++, start_phys_aligned += incr) { |
59 | if (*p == pattern) | 58 | if (*p == pattern) |
60 | continue; | 59 | continue; |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 3b285e656e27..b07dd8d0b321 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -40,8 +40,9 @@ static int profile_exceptions_notify(struct notifier_block *self, | |||
40 | 40 | ||
41 | switch (val) { | 41 | switch (val) { |
42 | case DIE_NMI: | 42 | case DIE_NMI: |
43 | if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu))) | 43 | case DIE_NMI_IPI: |
44 | ret = NOTIFY_STOP; | 44 | model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)); |
45 | ret = NOTIFY_STOP; | ||
45 | break; | 46 | break; |
46 | default: | 47 | default: |
47 | break; | 48 | break; |
@@ -134,7 +135,7 @@ static void nmi_cpu_setup(void *dummy) | |||
134 | static struct notifier_block profile_exceptions_nb = { | 135 | static struct notifier_block profile_exceptions_nb = { |
135 | .notifier_call = profile_exceptions_notify, | 136 | .notifier_call = profile_exceptions_notify, |
136 | .next = NULL, | 137 | .next = NULL, |
137 | .priority = 0 | 138 | .priority = 2 |
138 | }; | 139 | }; |
139 | 140 | ||
140 | static int nmi_setup(void) | 141 | static int nmi_setup(void) |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 10131fbdaada..4da7230b3d17 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
@@ -18,7 +18,7 @@ | |||
18 | #include <asm/msr.h> | 18 | #include <asm/msr.h> |
19 | #include <asm/apic.h> | 19 | #include <asm/apic.h> |
20 | #include <asm/nmi.h> | 20 | #include <asm/nmi.h> |
21 | #include <asm/intel_arch_perfmon.h> | 21 | #include <asm/perf_counter.h> |
22 | 22 | ||
23 | #include "op_x86_model.h" | 23 | #include "op_x86_model.h" |
24 | #include "op_counter.h" | 24 | #include "op_counter.h" |
@@ -136,6 +136,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
136 | u64 val; | 136 | u64 val; |
137 | int i; | 137 | int i; |
138 | 138 | ||
139 | /* | ||
140 | * This can happen if perf counters are in use when | ||
141 | * we steal the die notifier NMI. | ||
142 | */ | ||
143 | if (unlikely(!reset_value)) | ||
144 | goto out; | ||
145 | |||
139 | for (i = 0 ; i < num_counters; ++i) { | 146 | for (i = 0 ; i < num_counters; ++i) { |
140 | if (!reset_value[i]) | 147 | if (!reset_value[i]) |
141 | continue; | 148 | continue; |
@@ -146,6 +153,7 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
146 | } | 153 | } |
147 | } | 154 | } |
148 | 155 | ||
156 | out: | ||
149 | /* Only P6 based Pentium M need to re-unmask the apic vector but it | 157 | /* Only P6 based Pentium M need to re-unmask the apic vector but it |
150 | * doesn't hurt other P6 variant */ | 158 | * doesn't hurt other P6 variant */ |
151 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | 159 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); |
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 1241f118ab56..58bc00f68b12 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -338,6 +338,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |||
338 | } | 338 | } |
339 | } | 339 | } |
340 | 340 | ||
341 | current->mm->context.vdso = (void *)addr; | ||
342 | |||
341 | if (compat_uses_vma || !compat) { | 343 | if (compat_uses_vma || !compat) { |
342 | /* | 344 | /* |
343 | * MAYWRITE to allow gdb to COW and set breakpoints | 345 | * MAYWRITE to allow gdb to COW and set breakpoints |
@@ -358,11 +360,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |||
358 | goto up_fail; | 360 | goto up_fail; |
359 | } | 361 | } |
360 | 362 | ||
361 | current->mm->context.vdso = (void *)addr; | ||
362 | current_thread_info()->sysenter_return = | 363 | current_thread_info()->sysenter_return = |
363 | VDSO32_SYMBOL(addr, SYSENTER_RETURN); | 364 | VDSO32_SYMBOL(addr, SYSENTER_RETURN); |
364 | 365 | ||
365 | up_fail: | 366 | up_fail: |
367 | if (ret) | ||
368 | current->mm->context.vdso = NULL; | ||
369 | |||
366 | up_write(&mm->mmap_sem); | 370 | up_write(&mm->mmap_sem); |
367 | 371 | ||
368 | return ret; | 372 | return ret; |
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index cac083386e03..21e1aeb9f3ea 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c | |||
@@ -116,15 +116,18 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |||
116 | goto up_fail; | 116 | goto up_fail; |
117 | } | 117 | } |
118 | 118 | ||
119 | current->mm->context.vdso = (void *)addr; | ||
120 | |||
119 | ret = install_special_mapping(mm, addr, vdso_size, | 121 | ret = install_special_mapping(mm, addr, vdso_size, |
120 | VM_READ|VM_EXEC| | 122 | VM_READ|VM_EXEC| |
121 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | 123 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| |
122 | VM_ALWAYSDUMP, | 124 | VM_ALWAYSDUMP, |
123 | vdso_pages); | 125 | vdso_pages); |
124 | if (ret) | 126 | if (ret) { |
127 | current->mm->context.vdso = NULL; | ||
125 | goto up_fail; | 128 | goto up_fail; |
129 | } | ||
126 | 130 | ||
127 | current->mm->context.vdso = (void *)addr; | ||
128 | up_fail: | 131 | up_fail: |
129 | up_write(&mm->mmap_sem); | 132 | up_write(&mm->mmap_sem); |
130 | return ret; | 133 | return ret; |
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 6b91c26a4635..15a23031833f 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c | |||
@@ -77,8 +77,6 @@ static ssize_t ahci_led_store(struct ata_port *ap, const char *buf, | |||
77 | size_t size); | 77 | size_t size); |
78 | static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state, | 78 | static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state, |
79 | ssize_t size); | 79 | ssize_t size); |
80 | #define MAX_SLOTS 8 | ||
81 | #define MAX_RETRY 15 | ||
82 | 80 | ||
83 | enum { | 81 | enum { |
84 | AHCI_PCI_BAR = 5, | 82 | AHCI_PCI_BAR = 5, |
@@ -231,6 +229,10 @@ enum { | |||
231 | 229 | ||
232 | ICH_MAP = 0x90, /* ICH MAP register */ | 230 | ICH_MAP = 0x90, /* ICH MAP register */ |
233 | 231 | ||
232 | /* em constants */ | ||
233 | EM_MAX_SLOTS = 8, | ||
234 | EM_MAX_RETRY = 5, | ||
235 | |||
234 | /* em_ctl bits */ | 236 | /* em_ctl bits */ |
235 | EM_CTL_RST = (1 << 9), /* Reset */ | 237 | EM_CTL_RST = (1 << 9), /* Reset */ |
236 | EM_CTL_TM = (1 << 8), /* Transmit Message */ | 238 | EM_CTL_TM = (1 << 8), /* Transmit Message */ |
@@ -282,8 +284,8 @@ struct ahci_port_priv { | |||
282 | unsigned int ncq_saw_dmas:1; | 284 | unsigned int ncq_saw_dmas:1; |
283 | unsigned int ncq_saw_sdb:1; | 285 | unsigned int ncq_saw_sdb:1; |
284 | u32 intr_mask; /* interrupts to enable */ | 286 | u32 intr_mask; /* interrupts to enable */ |
285 | struct ahci_em_priv em_priv[MAX_SLOTS];/* enclosure management info | 287 | /* enclosure management info per PM slot */ |
286 | * per PM slot */ | 288 | struct ahci_em_priv em_priv[EM_MAX_SLOTS]; |
287 | }; | 289 | }; |
288 | 290 | ||
289 | static int ahci_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val); | 291 | static int ahci_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val); |
@@ -313,7 +315,6 @@ static void ahci_error_handler(struct ata_port *ap); | |||
313 | static void ahci_post_internal_cmd(struct ata_queued_cmd *qc); | 315 | static void ahci_post_internal_cmd(struct ata_queued_cmd *qc); |
314 | static int ahci_port_resume(struct ata_port *ap); | 316 | static int ahci_port_resume(struct ata_port *ap); |
315 | static void ahci_dev_config(struct ata_device *dev); | 317 | static void ahci_dev_config(struct ata_device *dev); |
316 | static unsigned int ahci_fill_sg(struct ata_queued_cmd *qc, void *cmd_tbl); | ||
317 | static void ahci_fill_cmd_slot(struct ahci_port_priv *pp, unsigned int tag, | 318 | static void ahci_fill_cmd_slot(struct ahci_port_priv *pp, unsigned int tag, |
318 | u32 opts); | 319 | u32 opts); |
319 | #ifdef CONFIG_PM | 320 | #ifdef CONFIG_PM |
@@ -404,14 +405,14 @@ static struct ata_port_operations ahci_sb600_ops = { | |||
404 | #define AHCI_HFLAGS(flags) .private_data = (void *)(flags) | 405 | #define AHCI_HFLAGS(flags) .private_data = (void *)(flags) |
405 | 406 | ||
406 | static const struct ata_port_info ahci_port_info[] = { | 407 | static const struct ata_port_info ahci_port_info[] = { |
407 | /* board_ahci */ | 408 | [board_ahci] = |
408 | { | 409 | { |
409 | .flags = AHCI_FLAG_COMMON, | 410 | .flags = AHCI_FLAG_COMMON, |
410 | .pio_mask = ATA_PIO4, | 411 | .pio_mask = ATA_PIO4, |
411 | .udma_mask = ATA_UDMA6, | 412 | .udma_mask = ATA_UDMA6, |
412 | .port_ops = &ahci_ops, | 413 | .port_ops = &ahci_ops, |
413 | }, | 414 | }, |
414 | /* board_ahci_vt8251 */ | 415 | [board_ahci_vt8251] = |
415 | { | 416 | { |
416 | AHCI_HFLAGS (AHCI_HFLAG_NO_NCQ | AHCI_HFLAG_NO_PMP), | 417 | AHCI_HFLAGS (AHCI_HFLAG_NO_NCQ | AHCI_HFLAG_NO_PMP), |
417 | .flags = AHCI_FLAG_COMMON, | 418 | .flags = AHCI_FLAG_COMMON, |
@@ -419,7 +420,7 @@ static const struct ata_port_info ahci_port_info[] = { | |||
419 | .udma_mask = ATA_UDMA6, | 420 | .udma_mask = ATA_UDMA6, |
420 | .port_ops = &ahci_vt8251_ops, | 421 | .port_ops = &ahci_vt8251_ops, |
421 | }, | 422 | }, |
422 | /* board_ahci_ign_iferr */ | 423 | [board_ahci_ign_iferr] = |
423 | { | 424 | { |
424 | AHCI_HFLAGS (AHCI_HFLAG_IGN_IRQ_IF_ERR), | 425 | AHCI_HFLAGS (AHCI_HFLAG_IGN_IRQ_IF_ERR), |
425 | .flags = AHCI_FLAG_COMMON, | 426 | .flags = AHCI_FLAG_COMMON, |
@@ -427,17 +428,16 @@ static const struct ata_port_info ahci_port_info[] = { | |||
427 | .udma_mask = ATA_UDMA6, | 428 | .udma_mask = ATA_UDMA6, |
428 | .port_ops = &ahci_ops, | 429 | .port_ops = &ahci_ops, |
429 | }, | 430 | }, |
430 | /* board_ahci_sb600 */ | 431 | [board_ahci_sb600] = |
431 | { | 432 | { |
432 | AHCI_HFLAGS (AHCI_HFLAG_IGN_SERR_INTERNAL | | 433 | AHCI_HFLAGS (AHCI_HFLAG_IGN_SERR_INTERNAL | |
433 | AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI | | 434 | AHCI_HFLAG_NO_MSI | AHCI_HFLAG_SECT255), |
434 | AHCI_HFLAG_SECT255), | ||
435 | .flags = AHCI_FLAG_COMMON, | 435 | .flags = AHCI_FLAG_COMMON, |
436 | .pio_mask = ATA_PIO4, | 436 | .pio_mask = ATA_PIO4, |
437 | .udma_mask = ATA_UDMA6, | 437 | .udma_mask = ATA_UDMA6, |
438 | .port_ops = &ahci_sb600_ops, | 438 | .port_ops = &ahci_sb600_ops, |
439 | }, | 439 | }, |
440 | /* board_ahci_mv */ | 440 | [board_ahci_mv] = |
441 | { | 441 | { |
442 | AHCI_HFLAGS (AHCI_HFLAG_NO_NCQ | AHCI_HFLAG_NO_MSI | | 442 | AHCI_HFLAGS (AHCI_HFLAG_NO_NCQ | AHCI_HFLAG_NO_MSI | |
443 | AHCI_HFLAG_MV_PATA | AHCI_HFLAG_NO_PMP), | 443 | AHCI_HFLAG_MV_PATA | AHCI_HFLAG_NO_PMP), |
@@ -447,7 +447,7 @@ static const struct ata_port_info ahci_port_info[] = { | |||
447 | .udma_mask = ATA_UDMA6, | 447 | .udma_mask = ATA_UDMA6, |
448 | .port_ops = &ahci_ops, | 448 | .port_ops = &ahci_ops, |
449 | }, | 449 | }, |
450 | /* board_ahci_sb700, for SB700 and SB800 */ | 450 | [board_ahci_sb700] = /* for SB700 and SB800 */ |
451 | { | 451 | { |
452 | AHCI_HFLAGS (AHCI_HFLAG_IGN_SERR_INTERNAL), | 452 | AHCI_HFLAGS (AHCI_HFLAG_IGN_SERR_INTERNAL), |
453 | .flags = AHCI_FLAG_COMMON, | 453 | .flags = AHCI_FLAG_COMMON, |
@@ -455,7 +455,7 @@ static const struct ata_port_info ahci_port_info[] = { | |||
455 | .udma_mask = ATA_UDMA6, | 455 | .udma_mask = ATA_UDMA6, |
456 | .port_ops = &ahci_sb600_ops, | 456 | .port_ops = &ahci_sb600_ops, |
457 | }, | 457 | }, |
458 | /* board_ahci_mcp65 */ | 458 | [board_ahci_mcp65] = |
459 | { | 459 | { |
460 | AHCI_HFLAGS (AHCI_HFLAG_YES_NCQ), | 460 | AHCI_HFLAGS (AHCI_HFLAG_YES_NCQ), |
461 | .flags = AHCI_FLAG_COMMON, | 461 | .flags = AHCI_FLAG_COMMON, |
@@ -463,7 +463,7 @@ static const struct ata_port_info ahci_port_info[] = { | |||
463 | .udma_mask = ATA_UDMA6, | 463 | .udma_mask = ATA_UDMA6, |
464 | .port_ops = &ahci_ops, | 464 | .port_ops = &ahci_ops, |
465 | }, | 465 | }, |
466 | /* board_ahci_nopmp */ | 466 | [board_ahci_nopmp] = |
467 | { | 467 | { |
468 | AHCI_HFLAGS (AHCI_HFLAG_NO_PMP), | 468 | AHCI_HFLAGS (AHCI_HFLAG_NO_PMP), |
469 | .flags = AHCI_FLAG_COMMON, | 469 | .flags = AHCI_FLAG_COMMON, |
@@ -1141,12 +1141,12 @@ static void ahci_start_port(struct ata_port *ap) | |||
1141 | emp = &pp->em_priv[link->pmp]; | 1141 | emp = &pp->em_priv[link->pmp]; |
1142 | 1142 | ||
1143 | /* EM Transmit bit maybe busy during init */ | 1143 | /* EM Transmit bit maybe busy during init */ |
1144 | for (i = 0; i < MAX_RETRY; i++) { | 1144 | for (i = 0; i < EM_MAX_RETRY; i++) { |
1145 | rc = ahci_transmit_led_message(ap, | 1145 | rc = ahci_transmit_led_message(ap, |
1146 | emp->led_state, | 1146 | emp->led_state, |
1147 | 4); | 1147 | 4); |
1148 | if (rc == -EBUSY) | 1148 | if (rc == -EBUSY) |
1149 | udelay(100); | 1149 | msleep(1); |
1150 | else | 1150 | else |
1151 | break; | 1151 | break; |
1152 | } | 1152 | } |
@@ -1340,7 +1340,7 @@ static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state, | |||
1340 | 1340 | ||
1341 | /* get the slot number from the message */ | 1341 | /* get the slot number from the message */ |
1342 | pmp = (state & EM_MSG_LED_PMP_SLOT) >> 8; | 1342 | pmp = (state & EM_MSG_LED_PMP_SLOT) >> 8; |
1343 | if (pmp < MAX_SLOTS) | 1343 | if (pmp < EM_MAX_SLOTS) |
1344 | emp = &pp->em_priv[pmp]; | 1344 | emp = &pp->em_priv[pmp]; |
1345 | else | 1345 | else |
1346 | return -EINVAL; | 1346 | return -EINVAL; |
@@ -1408,7 +1408,7 @@ static ssize_t ahci_led_store(struct ata_port *ap, const char *buf, | |||
1408 | 1408 | ||
1409 | /* get the slot number from the message */ | 1409 | /* get the slot number from the message */ |
1410 | pmp = (state & EM_MSG_LED_PMP_SLOT) >> 8; | 1410 | pmp = (state & EM_MSG_LED_PMP_SLOT) >> 8; |
1411 | if (pmp < MAX_SLOTS) | 1411 | if (pmp < EM_MAX_SLOTS) |
1412 | emp = &pp->em_priv[pmp]; | 1412 | emp = &pp->em_priv[pmp]; |
1413 | else | 1413 | else |
1414 | return -EINVAL; | 1414 | return -EINVAL; |
@@ -2584,6 +2584,51 @@ static void ahci_p5wdh_workaround(struct ata_host *host) | |||
2584 | } | 2584 | } |
2585 | } | 2585 | } |
2586 | 2586 | ||
2587 | /* | ||
2588 | * SB600 ahci controller on ASUS M2A-VM can't do 64bit DMA with older | ||
2589 | * BIOS. The oldest version known to be broken is 0901 and working is | ||
2590 | * 1501 which was released on 2007-10-26. Force 32bit DMA on anything | ||
2591 | * older than 1501. Please read bko#9412 for more info. | ||
2592 | */ | ||
2593 | static bool ahci_asus_m2a_vm_32bit_only(struct pci_dev *pdev) | ||
2594 | { | ||
2595 | static const struct dmi_system_id sysids[] = { | ||
2596 | { | ||
2597 | .ident = "ASUS M2A-VM", | ||
2598 | .matches = { | ||
2599 | DMI_MATCH(DMI_BOARD_VENDOR, | ||
2600 | "ASUSTeK Computer INC."), | ||
2601 | DMI_MATCH(DMI_BOARD_NAME, "M2A-VM"), | ||
2602 | }, | ||
2603 | }, | ||
2604 | { } | ||
2605 | }; | ||
2606 | const char *cutoff_mmdd = "10/26"; | ||
2607 | const char *date; | ||
2608 | int year; | ||
2609 | |||
2610 | if (pdev->bus->number != 0 || pdev->devfn != PCI_DEVFN(0x12, 0) || | ||
2611 | !dmi_check_system(sysids)) | ||
2612 | return false; | ||
2613 | |||
2614 | /* | ||
2615 | * Argh.... both version and date are free form strings. | ||
2616 | * Let's hope they're using the same date format across | ||
2617 | * different versions. | ||
2618 | */ | ||
2619 | date = dmi_get_system_info(DMI_BIOS_DATE); | ||
2620 | year = dmi_get_year(DMI_BIOS_DATE); | ||
2621 | if (date && strlen(date) >= 10 && date[2] == '/' && date[5] == '/' && | ||
2622 | (year > 2007 || | ||
2623 | (year == 2007 && strncmp(date, cutoff_mmdd, 5) >= 0))) | ||
2624 | return false; | ||
2625 | |||
2626 | dev_printk(KERN_WARNING, &pdev->dev, "ASUS M2A-VM: BIOS too old, " | ||
2627 | "forcing 32bit DMA, update BIOS\n"); | ||
2628 | |||
2629 | return true; | ||
2630 | } | ||
2631 | |||
2587 | static bool ahci_broken_system_poweroff(struct pci_dev *pdev) | 2632 | static bool ahci_broken_system_poweroff(struct pci_dev *pdev) |
2588 | { | 2633 | { |
2589 | static const struct dmi_system_id broken_systems[] = { | 2634 | static const struct dmi_system_id broken_systems[] = { |
@@ -2744,6 +2789,10 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) | |||
2744 | if (board_id == board_ahci_sb700 && pdev->revision >= 0x40) | 2789 | if (board_id == board_ahci_sb700 && pdev->revision >= 0x40) |
2745 | hpriv->flags &= ~AHCI_HFLAG_IGN_SERR_INTERNAL; | 2790 | hpriv->flags &= ~AHCI_HFLAG_IGN_SERR_INTERNAL; |
2746 | 2791 | ||
2792 | /* apply ASUS M2A_VM quirk */ | ||
2793 | if (ahci_asus_m2a_vm_32bit_only(pdev)) | ||
2794 | hpriv->flags |= AHCI_HFLAG_32BIT_ONLY; | ||
2795 | |||
2747 | if (!(hpriv->flags & AHCI_HFLAG_NO_MSI)) | 2796 | if (!(hpriv->flags & AHCI_HFLAG_NO_MSI)) |
2748 | pci_enable_msi(pdev); | 2797 | pci_enable_msi(pdev); |
2749 | 2798 | ||
diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index 1aeb7082b0c4..d0a14cf2bd74 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c | |||
@@ -223,10 +223,8 @@ static const struct pci_device_id piix_pci_tbl[] = { | |||
223 | /* ICH8 Mobile PATA Controller */ | 223 | /* ICH8 Mobile PATA Controller */ |
224 | { 0x8086, 0x2850, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, | 224 | { 0x8086, 0x2850, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, |
225 | 225 | ||
226 | /* NOTE: The following PCI ids must be kept in sync with the | 226 | /* SATA ports */ |
227 | * list in drivers/pci/quirks.c. | 227 | |
228 | */ | ||
229 | |||
230 | /* 82801EB (ICH5) */ | 228 | /* 82801EB (ICH5) */ |
231 | { 0x8086, 0x24d1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_sata }, | 229 | { 0x8086, 0x24d1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_sata }, |
232 | /* 82801EB (ICH5) */ | 230 | /* 82801EB (ICH5) */ |
@@ -1509,8 +1507,8 @@ static int __devinit piix_init_one(struct pci_dev *pdev, | |||
1509 | dev_printk(KERN_DEBUG, &pdev->dev, | 1507 | dev_printk(KERN_DEBUG, &pdev->dev, |
1510 | "version " DRV_VERSION "\n"); | 1508 | "version " DRV_VERSION "\n"); |
1511 | 1509 | ||
1512 | /* no hotplugging support (FIXME) */ | 1510 | /* no hotplugging support for later devices (FIXME) */ |
1513 | if (!in_module_init) | 1511 | if (!in_module_init && ent->driver_data >= ich5_sata) |
1514 | return -ENODEV; | 1512 | return -ENODEV; |
1515 | 1513 | ||
1516 | if (piix_broken_system_poweroff(pdev)) { | 1514 | if (piix_broken_system_poweroff(pdev)) { |
@@ -1591,6 +1589,7 @@ static int __devinit piix_init_one(struct pci_dev *pdev, | |||
1591 | host->ports[1]->mwdma_mask = 0; | 1589 | host->ports[1]->mwdma_mask = 0; |
1592 | host->ports[1]->udma_mask = 0; | 1590 | host->ports[1]->udma_mask = 0; |
1593 | } | 1591 | } |
1592 | host->flags |= ATA_HOST_PARALLEL_SCAN; | ||
1594 | 1593 | ||
1595 | pci_set_master(pdev); | 1594 | pci_set_master(pdev); |
1596 | return ata_pci_sff_activate_host(host, ata_sff_interrupt, &piix_sht); | 1595 | return ata_pci_sff_activate_host(host, ata_sff_interrupt, &piix_sht); |
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index c9242301cfa1..ca4d208ddf3b 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c | |||
@@ -5031,7 +5031,6 @@ int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active) | |||
5031 | { | 5031 | { |
5032 | int nr_done = 0; | 5032 | int nr_done = 0; |
5033 | u32 done_mask; | 5033 | u32 done_mask; |
5034 | int i; | ||
5035 | 5034 | ||
5036 | done_mask = ap->qc_active ^ qc_active; | 5035 | done_mask = ap->qc_active ^ qc_active; |
5037 | 5036 | ||
@@ -5041,16 +5040,16 @@ int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active) | |||
5041 | return -EINVAL; | 5040 | return -EINVAL; |
5042 | } | 5041 | } |
5043 | 5042 | ||
5044 | for (i = 0; i < ATA_MAX_QUEUE; i++) { | 5043 | while (done_mask) { |
5045 | struct ata_queued_cmd *qc; | 5044 | struct ata_queued_cmd *qc; |
5045 | unsigned int tag = __ffs(done_mask); | ||
5046 | 5046 | ||
5047 | if (!(done_mask & (1 << i))) | 5047 | qc = ata_qc_from_tag(ap, tag); |
5048 | continue; | 5048 | if (qc) { |
5049 | |||
5050 | if ((qc = ata_qc_from_tag(ap, i))) { | ||
5051 | ata_qc_complete(qc); | 5049 | ata_qc_complete(qc); |
5052 | nr_done++; | 5050 | nr_done++; |
5053 | } | 5051 | } |
5052 | done_mask &= ~(1 << tag); | ||
5054 | } | 5053 | } |
5055 | 5054 | ||
5056 | return nr_done; | 5055 | return nr_done; |
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index bb18415d3d63..bbbb1fab1755 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c | |||
@@ -727,17 +727,23 @@ unsigned int ata_sff_data_xfer(struct ata_device *dev, unsigned char *buf, | |||
727 | else | 727 | else |
728 | iowrite16_rep(data_addr, buf, words); | 728 | iowrite16_rep(data_addr, buf, words); |
729 | 729 | ||
730 | /* Transfer trailing 1 byte, if any. */ | 730 | /* Transfer trailing byte, if any. */ |
731 | if (unlikely(buflen & 0x01)) { | 731 | if (unlikely(buflen & 0x01)) { |
732 | __le16 align_buf[1] = { 0 }; | 732 | unsigned char pad[2]; |
733 | unsigned char *trailing_buf = buf + buflen - 1; | ||
734 | 733 | ||
734 | /* Point buf to the tail of buffer */ | ||
735 | buf += buflen - 1; | ||
736 | |||
737 | /* | ||
738 | * Use io*16_rep() accessors here as well to avoid pointlessly | ||
739 | * swapping bytes to and fro on the big endian machines... | ||
740 | */ | ||
735 | if (rw == READ) { | 741 | if (rw == READ) { |
736 | align_buf[0] = cpu_to_le16(ioread16(data_addr)); | 742 | ioread16_rep(data_addr, pad, 1); |
737 | memcpy(trailing_buf, align_buf, 1); | 743 | *buf = pad[0]; |
738 | } else { | 744 | } else { |
739 | memcpy(align_buf, trailing_buf, 1); | 745 | pad[0] = *buf; |
740 | iowrite16(le16_to_cpu(align_buf[0]), data_addr); | 746 | iowrite16_rep(data_addr, pad, 1); |
741 | } | 747 | } |
742 | words++; | 748 | words++; |
743 | } | 749 | } |
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 6cda12ba8122..b2d11f300c39 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c | |||
@@ -305,8 +305,8 @@ static irqreturn_t nv_ck804_interrupt(int irq, void *dev_instance); | |||
305 | static int nv_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val); | 305 | static int nv_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val); |
306 | static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val); | 306 | static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val); |
307 | 307 | ||
308 | static int nv_noclassify_hardreset(struct ata_link *link, unsigned int *class, | 308 | static int nv_hardreset(struct ata_link *link, unsigned int *class, |
309 | unsigned long deadline); | 309 | unsigned long deadline); |
310 | static void nv_nf2_freeze(struct ata_port *ap); | 310 | static void nv_nf2_freeze(struct ata_port *ap); |
311 | static void nv_nf2_thaw(struct ata_port *ap); | 311 | static void nv_nf2_thaw(struct ata_port *ap); |
312 | static void nv_ck804_freeze(struct ata_port *ap); | 312 | static void nv_ck804_freeze(struct ata_port *ap); |
@@ -406,49 +406,82 @@ static struct scsi_host_template nv_swncq_sht = { | |||
406 | .slave_configure = nv_swncq_slave_config, | 406 | .slave_configure = nv_swncq_slave_config, |
407 | }; | 407 | }; |
408 | 408 | ||
409 | static struct ata_port_operations nv_common_ops = { | 409 | /* |
410 | * NV SATA controllers have various different problems with hardreset | ||
411 | * protocol depending on the specific controller and device. | ||
412 | * | ||
413 | * GENERIC: | ||
414 | * | ||
415 | * bko11195 reports that link doesn't come online after hardreset on | ||
416 | * generic nv's and there have been several other similar reports on | ||
417 | * linux-ide. | ||
418 | * | ||
419 | * bko12351#c23 reports that warmplug on MCP61 doesn't work with | ||
420 | * softreset. | ||
421 | * | ||
422 | * NF2/3: | ||
423 | * | ||
424 | * bko3352 reports nf2/3 controllers can't determine device signature | ||
425 | * reliably after hardreset. The following thread reports detection | ||
426 | * failure on cold boot with the standard debouncing timing. | ||
427 | * | ||
428 | * http://thread.gmane.org/gmane.linux.ide/34098 | ||
429 | * | ||
430 | * bko12176 reports that hardreset fails to bring up the link during | ||
431 | * boot on nf2. | ||
432 | * | ||
433 | * CK804: | ||
434 | * | ||
435 | * For initial probing after boot and hot plugging, hardreset mostly | ||
436 | * works fine on CK804 but curiously, reprobing on the initial port | ||
437 | * by rescanning or rmmod/insmod fails to acquire the initial D2H Reg | ||
438 | * FIS in somewhat undeterministic way. | ||
439 | * | ||
440 | * SWNCQ: | ||
441 | * | ||
442 | * bko12351 reports that when SWNCQ is enabled, for hotplug to work, | ||
443 | * hardreset should be used and hardreset can't report proper | ||
444 | * signature, which suggests that mcp5x is closer to nf2 as long as | ||
445 | * reset quirkiness is concerned. | ||
446 | * | ||
447 | * bko12703 reports that boot probing fails for intel SSD with | ||
448 | * hardreset. Link fails to come online. Softreset works fine. | ||
449 | * | ||
450 | * The failures are varied but the following patterns seem true for | ||
451 | * all flavors. | ||
452 | * | ||
453 | * - Softreset during boot always works. | ||
454 | * | ||
455 | * - Hardreset during boot sometimes fails to bring up the link on | ||
456 | * certain comibnations and device signature acquisition is | ||
457 | * unreliable. | ||
458 | * | ||
459 | * - Hardreset is often necessary after hotplug. | ||
460 | * | ||
461 | * So, preferring softreset for boot probing and error handling (as | ||
462 | * hardreset might bring down the link) but using hardreset for | ||
463 | * post-boot probing should work around the above issues in most | ||
464 | * cases. Define nv_hardreset() which only kicks in for post-boot | ||
465 | * probing and use it for all variants. | ||
466 | */ | ||
467 | static struct ata_port_operations nv_generic_ops = { | ||
410 | .inherits = &ata_bmdma_port_ops, | 468 | .inherits = &ata_bmdma_port_ops, |
411 | .lost_interrupt = ATA_OP_NULL, | 469 | .lost_interrupt = ATA_OP_NULL, |
412 | .scr_read = nv_scr_read, | 470 | .scr_read = nv_scr_read, |
413 | .scr_write = nv_scr_write, | 471 | .scr_write = nv_scr_write, |
472 | .hardreset = nv_hardreset, | ||
414 | }; | 473 | }; |
415 | 474 | ||
416 | /* OSDL bz11195 reports that link doesn't come online after hardreset | ||
417 | * on generic nv's and there have been several other similar reports | ||
418 | * on linux-ide. Disable hardreset for generic nv's. | ||
419 | */ | ||
420 | static struct ata_port_operations nv_generic_ops = { | ||
421 | .inherits = &nv_common_ops, | ||
422 | .hardreset = ATA_OP_NULL, | ||
423 | }; | ||
424 | |||
425 | /* nf2 is ripe with hardreset related problems. | ||
426 | * | ||
427 | * kernel bz#3352 reports nf2/3 controllers can't determine device | ||
428 | * signature reliably. The following thread reports detection failure | ||
429 | * on cold boot with the standard debouncing timing. | ||
430 | * | ||
431 | * http://thread.gmane.org/gmane.linux.ide/34098 | ||
432 | * | ||
433 | * And bz#12176 reports that hardreset simply doesn't work on nf2. | ||
434 | * Give up on it and just don't do hardreset. | ||
435 | */ | ||
436 | static struct ata_port_operations nv_nf2_ops = { | 475 | static struct ata_port_operations nv_nf2_ops = { |
437 | .inherits = &nv_generic_ops, | 476 | .inherits = &nv_generic_ops, |
438 | .freeze = nv_nf2_freeze, | 477 | .freeze = nv_nf2_freeze, |
439 | .thaw = nv_nf2_thaw, | 478 | .thaw = nv_nf2_thaw, |
440 | }; | 479 | }; |
441 | 480 | ||
442 | /* For initial probing after boot and hot plugging, hardreset mostly | ||
443 | * works fine on CK804 but curiously, reprobing on the initial port by | ||
444 | * rescanning or rmmod/insmod fails to acquire the initial D2H Reg FIS | ||
445 | * in somewhat undeterministic way. Use noclassify hardreset. | ||
446 | */ | ||
447 | static struct ata_port_operations nv_ck804_ops = { | 481 | static struct ata_port_operations nv_ck804_ops = { |
448 | .inherits = &nv_common_ops, | 482 | .inherits = &nv_generic_ops, |
449 | .freeze = nv_ck804_freeze, | 483 | .freeze = nv_ck804_freeze, |
450 | .thaw = nv_ck804_thaw, | 484 | .thaw = nv_ck804_thaw, |
451 | .hardreset = nv_noclassify_hardreset, | ||
452 | .host_stop = nv_ck804_host_stop, | 485 | .host_stop = nv_ck804_host_stop, |
453 | }; | 486 | }; |
454 | 487 | ||
@@ -476,19 +509,8 @@ static struct ata_port_operations nv_adma_ops = { | |||
476 | .host_stop = nv_adma_host_stop, | 509 | .host_stop = nv_adma_host_stop, |
477 | }; | 510 | }; |
478 | 511 | ||
479 | /* Kernel bz#12351 reports that when SWNCQ is enabled, for hotplug to | ||
480 | * work, hardreset should be used and hardreset can't report proper | ||
481 | * signature, which suggests that mcp5x is closer to nf2 as long as | ||
482 | * reset quirkiness is concerned. Define separate ops for mcp5x with | ||
483 | * nv_noclassify_hardreset(). | ||
484 | */ | ||
485 | static struct ata_port_operations nv_mcp5x_ops = { | ||
486 | .inherits = &nv_common_ops, | ||
487 | .hardreset = nv_noclassify_hardreset, | ||
488 | }; | ||
489 | |||
490 | static struct ata_port_operations nv_swncq_ops = { | 512 | static struct ata_port_operations nv_swncq_ops = { |
491 | .inherits = &nv_mcp5x_ops, | 513 | .inherits = &nv_generic_ops, |
492 | 514 | ||
493 | .qc_defer = ata_std_qc_defer, | 515 | .qc_defer = ata_std_qc_defer, |
494 | .qc_prep = nv_swncq_qc_prep, | 516 | .qc_prep = nv_swncq_qc_prep, |
@@ -557,7 +579,7 @@ static const struct ata_port_info nv_port_info[] = { | |||
557 | .pio_mask = NV_PIO_MASK, | 579 | .pio_mask = NV_PIO_MASK, |
558 | .mwdma_mask = NV_MWDMA_MASK, | 580 | .mwdma_mask = NV_MWDMA_MASK, |
559 | .udma_mask = NV_UDMA_MASK, | 581 | .udma_mask = NV_UDMA_MASK, |
560 | .port_ops = &nv_mcp5x_ops, | 582 | .port_ops = &nv_generic_ops, |
561 | .private_data = NV_PI_PRIV(nv_generic_interrupt, &nv_sht), | 583 | .private_data = NV_PI_PRIV(nv_generic_interrupt, &nv_sht), |
562 | }, | 584 | }, |
563 | /* SWNCQ */ | 585 | /* SWNCQ */ |
@@ -1559,15 +1581,24 @@ static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val) | |||
1559 | return 0; | 1581 | return 0; |
1560 | } | 1582 | } |
1561 | 1583 | ||
1562 | static int nv_noclassify_hardreset(struct ata_link *link, unsigned int *class, | 1584 | static int nv_hardreset(struct ata_link *link, unsigned int *class, |
1563 | unsigned long deadline) | 1585 | unsigned long deadline) |
1564 | { | 1586 | { |
1565 | bool online; | 1587 | struct ata_eh_context *ehc = &link->eh_context; |
1566 | int rc; | ||
1567 | 1588 | ||
1568 | rc = sata_link_hardreset(link, sata_deb_timing_hotplug, deadline, | 1589 | /* Do hardreset iff it's post-boot probing, please read the |
1569 | &online, NULL); | 1590 | * comment above port ops for details. |
1570 | return online ? -EAGAIN : rc; | 1591 | */ |
1592 | if (!(link->ap->pflags & ATA_PFLAG_LOADING) && | ||
1593 | !ata_dev_enabled(link->device)) | ||
1594 | sata_link_hardreset(link, sata_deb_timing_hotplug, deadline, | ||
1595 | NULL, NULL); | ||
1596 | else if (!(ehc->i.flags & ATA_EHI_QUIET)) | ||
1597 | ata_link_printk(link, KERN_INFO, | ||
1598 | "nv: skipping hardreset on occupied port\n"); | ||
1599 | |||
1600 | /* device signature acquisition is unreliable */ | ||
1601 | return -EAGAIN; | ||
1571 | } | 1602 | } |
1572 | 1603 | ||
1573 | static void nv_nf2_freeze(struct ata_port *ap) | 1604 | static void nv_nf2_freeze(struct ata_port *ap) |
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c index e67ce8e5caa5..030ec079b184 100644 --- a/drivers/ata/sata_sil.c +++ b/drivers/ata/sata_sil.c | |||
@@ -183,7 +183,7 @@ static struct scsi_host_template sil_sht = { | |||
183 | }; | 183 | }; |
184 | 184 | ||
185 | static struct ata_port_operations sil_ops = { | 185 | static struct ata_port_operations sil_ops = { |
186 | .inherits = &ata_bmdma_port_ops, | 186 | .inherits = &ata_bmdma32_port_ops, |
187 | .dev_config = sil_dev_config, | 187 | .dev_config = sil_dev_config, |
188 | .set_mode = sil_set_mode, | 188 | .set_mode = sil_set_mode, |
189 | .bmdma_setup = sil_bmdma_setup, | 189 | .bmdma_setup = sil_bmdma_setup, |
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c index eb05a3c82a9e..bbcf970068ad 100644 --- a/drivers/ata/sata_sx4.c +++ b/drivers/ata/sata_sx4.c | |||
@@ -193,6 +193,7 @@ enum { | |||
193 | PDC_TIMER_MASK_INT, | 193 | PDC_TIMER_MASK_INT, |
194 | }; | 194 | }; |
195 | 195 | ||
196 | #define ECC_ERASE_BUF_SZ (128 * 1024) | ||
196 | 197 | ||
197 | struct pdc_port_priv { | 198 | struct pdc_port_priv { |
198 | u8 dimm_buf[(ATA_PRD_SZ * ATA_MAX_PRD) + 512]; | 199 | u8 dimm_buf[(ATA_PRD_SZ * ATA_MAX_PRD) + 512]; |
@@ -1280,7 +1281,6 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) | |||
1280 | { | 1281 | { |
1281 | int speed, size, length; | 1282 | int speed, size, length; |
1282 | u32 addr, spd0, pci_status; | 1283 | u32 addr, spd0, pci_status; |
1283 | u32 tmp = 0; | ||
1284 | u32 time_period = 0; | 1284 | u32 time_period = 0; |
1285 | u32 tcount = 0; | 1285 | u32 tcount = 0; |
1286 | u32 ticks = 0; | 1286 | u32 ticks = 0; |
@@ -1395,14 +1395,17 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) | |||
1395 | pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS, | 1395 | pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS, |
1396 | PDC_DIMM_SPD_TYPE, &spd0); | 1396 | PDC_DIMM_SPD_TYPE, &spd0); |
1397 | if (spd0 == 0x02) { | 1397 | if (spd0 == 0x02) { |
1398 | void *buf; | ||
1398 | VPRINTK("Start ECC initialization\n"); | 1399 | VPRINTK("Start ECC initialization\n"); |
1399 | addr = 0; | 1400 | addr = 0; |
1400 | length = size * 1024 * 1024; | 1401 | length = size * 1024 * 1024; |
1402 | buf = kzalloc(ECC_ERASE_BUF_SZ, GFP_KERNEL); | ||
1401 | while (addr < length) { | 1403 | while (addr < length) { |
1402 | pdc20621_put_to_dimm(host, (void *) &tmp, addr, | 1404 | pdc20621_put_to_dimm(host, buf, addr, |
1403 | sizeof(u32)); | 1405 | ECC_ERASE_BUF_SZ); |
1404 | addr += sizeof(u32); | 1406 | addr += ECC_ERASE_BUF_SZ; |
1405 | } | 1407 | } |
1408 | kfree(buf); | ||
1406 | VPRINTK("Finish ECC initialization\n"); | 1409 | VPRINTK("Finish ECC initialization\n"); |
1407 | } | 1410 | } |
1408 | return 0; | 1411 | return 0; |
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index d6a807f4077d..39a05b5fa9cb 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/kbd_kern.h> | 25 | #include <linux/kbd_kern.h> |
26 | #include <linux/proc_fs.h> | 26 | #include <linux/proc_fs.h> |
27 | #include <linux/quotaops.h> | 27 | #include <linux/quotaops.h> |
28 | #include <linux/perf_counter.h> | ||
28 | #include <linux/kernel.h> | 29 | #include <linux/kernel.h> |
29 | #include <linux/module.h> | 30 | #include <linux/module.h> |
30 | #include <linux/suspend.h> | 31 | #include <linux/suspend.h> |
@@ -243,6 +244,7 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty) | |||
243 | struct pt_regs *regs = get_irq_regs(); | 244 | struct pt_regs *regs = get_irq_regs(); |
244 | if (regs) | 245 | if (regs) |
245 | show_regs(regs); | 246 | show_regs(regs); |
247 | perf_counter_print_debug(); | ||
246 | } | 248 | } |
247 | static struct sysrq_key_op sysrq_showregs_op = { | 249 | static struct sysrq_key_op sysrq_showregs_op = { |
248 | .handler = sysrq_handle_showregs, | 250 | .handler = sysrq_handle_showregs, |
diff --git a/drivers/char/vt.c b/drivers/char/vt.c index 08151d4de489..de9ebee8657b 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c | |||
@@ -95,7 +95,6 @@ | |||
95 | #include <linux/timer.h> | 95 | #include <linux/timer.h> |
96 | #include <linux/interrupt.h> | 96 | #include <linux/interrupt.h> |
97 | #include <linux/workqueue.h> | 97 | #include <linux/workqueue.h> |
98 | #include <linux/bootmem.h> | ||
99 | #include <linux/pm.h> | 98 | #include <linux/pm.h> |
100 | #include <linux/font.h> | 99 | #include <linux/font.h> |
101 | #include <linux/bitops.h> | 100 | #include <linux/bitops.h> |
@@ -104,6 +103,7 @@ | |||
104 | #include <linux/io.h> | 103 | #include <linux/io.h> |
105 | #include <asm/system.h> | 104 | #include <asm/system.h> |
106 | #include <linux/uaccess.h> | 105 | #include <linux/uaccess.h> |
106 | #include <linux/kmemleak.h> | ||
107 | 107 | ||
108 | #define MAX_NR_CON_DRIVER 16 | 108 | #define MAX_NR_CON_DRIVER 16 |
109 | 109 | ||
@@ -2875,14 +2875,11 @@ static int __init con_init(void) | |||
2875 | mod_timer(&console_timer, jiffies + blankinterval); | 2875 | mod_timer(&console_timer, jiffies + blankinterval); |
2876 | } | 2876 | } |
2877 | 2877 | ||
2878 | /* | ||
2879 | * kmalloc is not running yet - we use the bootmem allocator. | ||
2880 | */ | ||
2881 | for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) { | 2878 | for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) { |
2882 | vc_cons[currcons].d = vc = alloc_bootmem(sizeof(struct vc_data)); | 2879 | vc_cons[currcons].d = vc = kzalloc(sizeof(struct vc_data), GFP_NOWAIT); |
2883 | INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK); | 2880 | INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK); |
2884 | visual_init(vc, currcons, 1); | 2881 | visual_init(vc, currcons, 1); |
2885 | vc->vc_screenbuf = (unsigned short *)alloc_bootmem(vc->vc_screenbuf_size); | 2882 | vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_NOWAIT); |
2886 | vc->vc_kmalloced = 0; | 2883 | vc->vc_kmalloced = 0; |
2887 | vc_init(vc, vc->vc_rows, vc->vc_cols, | 2884 | vc_init(vc, vc->vc_rows, vc->vc_cols, |
2888 | currcons || !vc->vc_sw->con_save_screen); | 2885 | currcons || !vc->vc_sw->con_save_screen); |
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 5f1b5400d96a..24c84ae81527 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c | |||
@@ -596,6 +596,7 @@ int dmi_get_year(int field) | |||
596 | 596 | ||
597 | return year; | 597 | return year; |
598 | } | 598 | } |
599 | EXPORT_SYMBOL(dmi_get_year); | ||
599 | 600 | ||
600 | /** | 601 | /** |
601 | * dmi_walk - Walk the DMI table and get called back for every record | 602 | * dmi_walk - Walk the DMI table and get called back for every record |
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 38e86b84dce0..59d7d5ec17a4 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c | |||
@@ -180,7 +180,7 @@ static inline void vga_set_mem_top(struct vc_data *c) | |||
180 | } | 180 | } |
181 | 181 | ||
182 | #ifdef CONFIG_VGACON_SOFT_SCROLLBACK | 182 | #ifdef CONFIG_VGACON_SOFT_SCROLLBACK |
183 | #include <linux/bootmem.h> | 183 | #include <linux/slab.h> |
184 | /* software scrollback */ | 184 | /* software scrollback */ |
185 | static void *vgacon_scrollback; | 185 | static void *vgacon_scrollback; |
186 | static int vgacon_scrollback_tail; | 186 | static int vgacon_scrollback_tail; |
@@ -210,8 +210,7 @@ static void vgacon_scrollback_init(int pitch) | |||
210 | */ | 210 | */ |
211 | static void __init_refok vgacon_scrollback_startup(void) | 211 | static void __init_refok vgacon_scrollback_startup(void) |
212 | { | 212 | { |
213 | vgacon_scrollback = alloc_bootmem(CONFIG_VGACON_SOFT_SCROLLBACK_SIZE | 213 | vgacon_scrollback = kcalloc(CONFIG_VGACON_SOFT_SCROLLBACK_SIZE, 1024, GFP_NOWAIT); |
214 | * 1024); | ||
215 | vgacon_scrollback_init(vga_video_num_columns * 2); | 214 | vgacon_scrollback_init(vga_video_num_columns * 2); |
216 | } | 215 | } |
217 | 216 | ||
diff --git a/fs/block_dev.c b/fs/block_dev.c index 2dfc6cdcebbe..931f6b8c4b2f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/uio.h> | 25 | #include <linux/uio.h> |
26 | #include <linux/namei.h> | 26 | #include <linux/namei.h> |
27 | #include <linux/log2.h> | 27 | #include <linux/log2.h> |
28 | #include <linux/kmemleak.h> | ||
28 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
29 | #include "internal.h" | 30 | #include "internal.h" |
30 | 31 | ||
@@ -492,6 +493,11 @@ void __init bdev_cache_init(void) | |||
492 | bd_mnt = kern_mount(&bd_type); | 493 | bd_mnt = kern_mount(&bd_type); |
493 | if (IS_ERR(bd_mnt)) | 494 | if (IS_ERR(bd_mnt)) |
494 | panic("Cannot create bdev pseudo-fs"); | 495 | panic("Cannot create bdev pseudo-fs"); |
496 | /* | ||
497 | * This vfsmount structure is only used to obtain the | ||
498 | * blockdev_superblock, so tell kmemleak not to report it. | ||
499 | */ | ||
500 | kmemleak_not_leak(bd_mnt); | ||
495 | blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ | 501 | blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ |
496 | } | 502 | } |
497 | 503 | ||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/string.h> | 33 | #include <linux/string.h> |
34 | #include <linux/init.h> | 34 | #include <linux/init.h> |
35 | #include <linux/pagemap.h> | 35 | #include <linux/pagemap.h> |
36 | #include <linux/perf_counter.h> | ||
36 | #include <linux/highmem.h> | 37 | #include <linux/highmem.h> |
37 | #include <linux/spinlock.h> | 38 | #include <linux/spinlock.h> |
38 | #include <linux/key.h> | 39 | #include <linux/key.h> |
@@ -922,6 +923,7 @@ void set_task_comm(struct task_struct *tsk, char *buf) | |||
922 | task_lock(tsk); | 923 | task_lock(tsk); |
923 | strlcpy(tsk->comm, buf, sizeof(tsk->comm)); | 924 | strlcpy(tsk->comm, buf, sizeof(tsk->comm)); |
924 | task_unlock(tsk); | 925 | task_unlock(tsk); |
926 | perf_counter_comm(tsk); | ||
925 | } | 927 | } |
926 | 928 | ||
927 | int flush_old_exec(struct linux_binprm * bprm) | 929 | int flush_old_exec(struct linux_binprm * bprm) |
@@ -990,6 +992,13 @@ int flush_old_exec(struct linux_binprm * bprm) | |||
990 | 992 | ||
991 | current->personality &= ~bprm->per_clear; | 993 | current->personality &= ~bprm->per_clear; |
992 | 994 | ||
995 | /* | ||
996 | * Flush performance counters when crossing a | ||
997 | * security domain: | ||
998 | */ | ||
999 | if (!get_dumpable(current->mm)) | ||
1000 | perf_counter_exit_task(current); | ||
1001 | |||
993 | /* An exec changes our domain. We are no longer part of the thread | 1002 | /* An exec changes our domain. We are no longer part of the thread |
994 | group */ | 1003 | group */ |
995 | 1004 | ||
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index 346057218edc..0fc30407f039 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
@@ -2571,6 +2571,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) | |||
2571 | 2571 | ||
2572 | txAbort(tid, 0); | 2572 | txAbort(tid, 0); |
2573 | txEnd(tid); | 2573 | txEnd(tid); |
2574 | mutex_unlock(&JFS_IP(ipimap)->commit_mutex); | ||
2574 | 2575 | ||
2575 | /* release the inode map lock */ | 2576 | /* release the inode map lock */ |
2576 | IWRITE_UNLOCK(ipimap); | 2577 | IWRITE_UNLOCK(ipimap); |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 6f21adf9479a..d9b0e92b3602 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -720,8 +720,10 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type, | |||
720 | blk++; | 720 | blk++; |
721 | } | 721 | } |
722 | out: | 722 | out: |
723 | if (len == towrite) | 723 | if (len == towrite) { |
724 | mutex_unlock(&inode->i_mutex); | ||
724 | return err; | 725 | return err; |
726 | } | ||
725 | if (inode->i_size < off+len-towrite) | 727 | if (inode->i_size < off+len-towrite) |
726 | i_size_write(inode, off+len-towrite); | 728 | i_size_write(inode, off+len-towrite); |
727 | inode->i_version++; | 729 | inode->i_version++; |
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 3673a13b6703..81d3be459efb 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h | |||
@@ -134,7 +134,7 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) | |||
134 | #define atomic_long_cmpxchg(l, old, new) \ | 134 | #define atomic_long_cmpxchg(l, old, new) \ |
135 | (atomic64_cmpxchg((atomic64_t *)(l), (old), (new))) | 135 | (atomic64_cmpxchg((atomic64_t *)(l), (old), (new))) |
136 | #define atomic_long_xchg(v, new) \ | 136 | #define atomic_long_xchg(v, new) \ |
137 | (atomic64_xchg((atomic64_t *)(l), (new))) | 137 | (atomic64_xchg((atomic64_t *)(v), (new))) |
138 | 138 | ||
139 | #else /* BITS_PER_LONG == 64 */ | 139 | #else /* BITS_PER_LONG == 64 */ |
140 | 140 | ||
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 6646bfc7b892..28b1f30601b5 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -108,6 +108,15 @@ extern struct group_info init_groups; | |||
108 | 108 | ||
109 | extern struct cred init_cred; | 109 | extern struct cred init_cred; |
110 | 110 | ||
111 | #ifdef CONFIG_PERF_COUNTERS | ||
112 | # define INIT_PERF_COUNTERS(tsk) \ | ||
113 | .perf_counter_mutex = \ | ||
114 | __MUTEX_INITIALIZER(tsk.perf_counter_mutex), \ | ||
115 | .perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list), | ||
116 | #else | ||
117 | # define INIT_PERF_COUNTERS(tsk) | ||
118 | #endif | ||
119 | |||
111 | /* | 120 | /* |
112 | * INIT_TASK is used to set up the first task table, touch at | 121 | * INIT_TASK is used to set up the first task table, touch at |
113 | * your own risk!. Base=0, limit=0x1fffff (=2MB) | 122 | * your own risk!. Base=0, limit=0x1fffff (=2MB) |
@@ -171,6 +180,7 @@ extern struct cred init_cred; | |||
171 | }, \ | 180 | }, \ |
172 | .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ | 181 | .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ |
173 | INIT_IDS \ | 182 | INIT_IDS \ |
183 | INIT_PERF_COUNTERS(tsk) \ | ||
174 | INIT_TRACE_IRQFLAGS \ | 184 | INIT_TRACE_IRQFLAGS \ |
175 | INIT_LOCKDEP \ | 185 | INIT_LOCKDEP \ |
176 | INIT_FTRACE_GRAPH \ | 186 | INIT_FTRACE_GRAPH \ |
diff --git a/include/linux/irq.h b/include/linux/irq.h index eedbb8e5e0cc..1e50c34f0062 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h | |||
@@ -430,23 +430,19 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); | |||
430 | * Returns true if successful (or not required). | 430 | * Returns true if successful (or not required). |
431 | */ | 431 | */ |
432 | static inline bool alloc_desc_masks(struct irq_desc *desc, int node, | 432 | static inline bool alloc_desc_masks(struct irq_desc *desc, int node, |
433 | bool boot) | 433 | bool boot) |
434 | { | 434 | { |
435 | #ifdef CONFIG_CPUMASK_OFFSTACK | 435 | gfp_t gfp = GFP_ATOMIC; |
436 | if (boot) { | ||
437 | alloc_bootmem_cpumask_var(&desc->affinity); | ||
438 | 436 | ||
439 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 437 | if (boot) |
440 | alloc_bootmem_cpumask_var(&desc->pending_mask); | 438 | gfp = GFP_NOWAIT; |
441 | #endif | ||
442 | return true; | ||
443 | } | ||
444 | 439 | ||
445 | if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) | 440 | #ifdef CONFIG_CPUMASK_OFFSTACK |
441 | if (!alloc_cpumask_var_node(&desc->affinity, gfp, node)) | ||
446 | return false; | 442 | return false; |
447 | 443 | ||
448 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 444 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
449 | if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) { | 445 | if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { |
450 | free_cpumask_var(desc->affinity); | 446 | free_cpumask_var(desc->affinity); |
451 | return false; | 447 | return false; |
452 | } | 448 | } |
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 0c8b89f28a95..a77c6007dc99 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h | |||
@@ -81,7 +81,12 @@ static inline unsigned int kstat_irqs(unsigned int irq) | |||
81 | return sum; | 81 | return sum; |
82 | } | 82 | } |
83 | 83 | ||
84 | |||
85 | /* | ||
86 | * Lock/unlock the current runqueue - to extract task statistics: | ||
87 | */ | ||
84 | extern unsigned long long task_delta_exec(struct task_struct *); | 88 | extern unsigned long long task_delta_exec(struct task_struct *); |
89 | |||
85 | extern void account_user_time(struct task_struct *, cputime_t, cputime_t); | 90 | extern void account_user_time(struct task_struct *, cputime_t, cputime_t); |
86 | extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); | 91 | extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); |
87 | extern void account_steal_time(cputime_t); | 92 | extern void account_steal_time(cputime_t); |
diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h new file mode 100644 index 000000000000..7796aed6cdd5 --- /dev/null +++ b/include/linux/kmemleak.h | |||
@@ -0,0 +1,96 @@ | |||
1 | /* | ||
2 | * include/linux/kmemleak.h | ||
3 | * | ||
4 | * Copyright (C) 2008 ARM Limited | ||
5 | * Written by Catalin Marinas <catalin.marinas@arm.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
19 | */ | ||
20 | |||
21 | #ifndef __KMEMLEAK_H | ||
22 | #define __KMEMLEAK_H | ||
23 | |||
24 | #ifdef CONFIG_DEBUG_KMEMLEAK | ||
25 | |||
26 | extern void kmemleak_init(void); | ||
27 | extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, | ||
28 | gfp_t gfp); | ||
29 | extern void kmemleak_free(const void *ptr); | ||
30 | extern void kmemleak_padding(const void *ptr, unsigned long offset, | ||
31 | size_t size); | ||
32 | extern void kmemleak_not_leak(const void *ptr); | ||
33 | extern void kmemleak_ignore(const void *ptr); | ||
34 | extern void kmemleak_scan_area(const void *ptr, unsigned long offset, | ||
35 | size_t length, gfp_t gfp); | ||
36 | extern void kmemleak_no_scan(const void *ptr); | ||
37 | |||
38 | static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, | ||
39 | int min_count, unsigned long flags, | ||
40 | gfp_t gfp) | ||
41 | { | ||
42 | if (!(flags & SLAB_NOLEAKTRACE)) | ||
43 | kmemleak_alloc(ptr, size, min_count, gfp); | ||
44 | } | ||
45 | |||
46 | static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags) | ||
47 | { | ||
48 | if (!(flags & SLAB_NOLEAKTRACE)) | ||
49 | kmemleak_free(ptr); | ||
50 | } | ||
51 | |||
52 | static inline void kmemleak_erase(void **ptr) | ||
53 | { | ||
54 | *ptr = NULL; | ||
55 | } | ||
56 | |||
57 | #else | ||
58 | |||
59 | static inline void kmemleak_init(void) | ||
60 | { | ||
61 | } | ||
62 | static inline void kmemleak_alloc(const void *ptr, size_t size, int min_count, | ||
63 | gfp_t gfp) | ||
64 | { | ||
65 | } | ||
66 | static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, | ||
67 | int min_count, unsigned long flags, | ||
68 | gfp_t gfp) | ||
69 | { | ||
70 | } | ||
71 | static inline void kmemleak_free(const void *ptr) | ||
72 | { | ||
73 | } | ||
74 | static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags) | ||
75 | { | ||
76 | } | ||
77 | static inline void kmemleak_not_leak(const void *ptr) | ||
78 | { | ||
79 | } | ||
80 | static inline void kmemleak_ignore(const void *ptr) | ||
81 | { | ||
82 | } | ||
83 | static inline void kmemleak_scan_area(const void *ptr, unsigned long offset, | ||
84 | size_t length, gfp_t gfp) | ||
85 | { | ||
86 | } | ||
87 | static inline void kmemleak_erase(void **ptr) | ||
88 | { | ||
89 | } | ||
90 | static inline void kmemleak_no_scan(const void *ptr) | ||
91 | { | ||
92 | } | ||
93 | |||
94 | #endif /* CONFIG_DEBUG_KMEMLEAK */ | ||
95 | |||
96 | #endif /* __KMEMLEAK_H */ | ||
diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 1581ff235c7e..26fd9d12f050 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h | |||
@@ -86,7 +86,12 @@ struct percpu_data { | |||
86 | void *ptrs[1]; | 86 | void *ptrs[1]; |
87 | }; | 87 | }; |
88 | 88 | ||
89 | /* pointer disguising messes up the kmemleak objects tracking */ | ||
90 | #ifndef CONFIG_DEBUG_KMEMLEAK | ||
89 | #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) | 91 | #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) |
92 | #else | ||
93 | #define __percpu_disguise(pdata) (struct percpu_data *)(pdata) | ||
94 | #endif | ||
90 | 95 | ||
91 | #define per_cpu_ptr(ptr, cpu) \ | 96 | #define per_cpu_ptr(ptr, cpu) \ |
92 | ({ \ | 97 | ({ \ |
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h new file mode 100644 index 000000000000..6e133954e2e4 --- /dev/null +++ b/include/linux/perf_counter.h | |||
@@ -0,0 +1,697 @@ | |||
1 | /* | ||
2 | * Performance counters: | ||
3 | * | ||
4 | * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar | ||
6 | * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra | ||
7 | * | ||
8 | * Data type definitions, declarations, prototypes. | ||
9 | * | ||
10 | * Started by: Thomas Gleixner and Ingo Molnar | ||
11 | * | ||
12 | * For licencing details see kernel-base/COPYING | ||
13 | */ | ||
14 | #ifndef _LINUX_PERF_COUNTER_H | ||
15 | #define _LINUX_PERF_COUNTER_H | ||
16 | |||
17 | #include <linux/types.h> | ||
18 | #include <linux/ioctl.h> | ||
19 | #include <asm/byteorder.h> | ||
20 | |||
21 | /* | ||
22 | * User-space ABI bits: | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * attr.type | ||
27 | */ | ||
28 | enum perf_type_id { | ||
29 | PERF_TYPE_HARDWARE = 0, | ||
30 | PERF_TYPE_SOFTWARE = 1, | ||
31 | PERF_TYPE_TRACEPOINT = 2, | ||
32 | PERF_TYPE_HW_CACHE = 3, | ||
33 | PERF_TYPE_RAW = 4, | ||
34 | |||
35 | PERF_TYPE_MAX, /* non-ABI */ | ||
36 | }; | ||
37 | |||
38 | /* | ||
39 | * Generalized performance counter event types, used by the | ||
40 | * attr.event_id parameter of the sys_perf_counter_open() | ||
41 | * syscall: | ||
42 | */ | ||
43 | enum perf_hw_id { | ||
44 | /* | ||
45 | * Common hardware events, generalized by the kernel: | ||
46 | */ | ||
47 | PERF_COUNT_HW_CPU_CYCLES = 0, | ||
48 | PERF_COUNT_HW_INSTRUCTIONS = 1, | ||
49 | PERF_COUNT_HW_CACHE_REFERENCES = 2, | ||
50 | PERF_COUNT_HW_CACHE_MISSES = 3, | ||
51 | PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, | ||
52 | PERF_COUNT_HW_BRANCH_MISSES = 5, | ||
53 | PERF_COUNT_HW_BUS_CYCLES = 6, | ||
54 | |||
55 | PERF_COUNT_HW_MAX, /* non-ABI */ | ||
56 | }; | ||
57 | |||
58 | /* | ||
59 | * Generalized hardware cache counters: | ||
60 | * | ||
61 | * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x | ||
62 | * { read, write, prefetch } x | ||
63 | * { accesses, misses } | ||
64 | */ | ||
65 | enum perf_hw_cache_id { | ||
66 | PERF_COUNT_HW_CACHE_L1D = 0, | ||
67 | PERF_COUNT_HW_CACHE_L1I = 1, | ||
68 | PERF_COUNT_HW_CACHE_LL = 2, | ||
69 | PERF_COUNT_HW_CACHE_DTLB = 3, | ||
70 | PERF_COUNT_HW_CACHE_ITLB = 4, | ||
71 | PERF_COUNT_HW_CACHE_BPU = 5, | ||
72 | |||
73 | PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ | ||
74 | }; | ||
75 | |||
76 | enum perf_hw_cache_op_id { | ||
77 | PERF_COUNT_HW_CACHE_OP_READ = 0, | ||
78 | PERF_COUNT_HW_CACHE_OP_WRITE = 1, | ||
79 | PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, | ||
80 | |||
81 | PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ | ||
82 | }; | ||
83 | |||
84 | enum perf_hw_cache_op_result_id { | ||
85 | PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, | ||
86 | PERF_COUNT_HW_CACHE_RESULT_MISS = 1, | ||
87 | |||
88 | PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ | ||
89 | }; | ||
90 | |||
91 | /* | ||
92 | * Special "software" counters provided by the kernel, even if the hardware | ||
93 | * does not support performance counters. These counters measure various | ||
94 | * physical and sw events of the kernel (and allow the profiling of them as | ||
95 | * well): | ||
96 | */ | ||
97 | enum perf_sw_ids { | ||
98 | PERF_COUNT_SW_CPU_CLOCK = 0, | ||
99 | PERF_COUNT_SW_TASK_CLOCK = 1, | ||
100 | PERF_COUNT_SW_PAGE_FAULTS = 2, | ||
101 | PERF_COUNT_SW_CONTEXT_SWITCHES = 3, | ||
102 | PERF_COUNT_SW_CPU_MIGRATIONS = 4, | ||
103 | PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, | ||
104 | PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, | ||
105 | |||
106 | PERF_COUNT_SW_MAX, /* non-ABI */ | ||
107 | }; | ||
108 | |||
109 | /* | ||
110 | * Bits that can be set in attr.sample_type to request information | ||
111 | * in the overflow packets. | ||
112 | */ | ||
113 | enum perf_counter_sample_format { | ||
114 | PERF_SAMPLE_IP = 1U << 0, | ||
115 | PERF_SAMPLE_TID = 1U << 1, | ||
116 | PERF_SAMPLE_TIME = 1U << 2, | ||
117 | PERF_SAMPLE_ADDR = 1U << 3, | ||
118 | PERF_SAMPLE_GROUP = 1U << 4, | ||
119 | PERF_SAMPLE_CALLCHAIN = 1U << 5, | ||
120 | PERF_SAMPLE_ID = 1U << 6, | ||
121 | PERF_SAMPLE_CPU = 1U << 7, | ||
122 | PERF_SAMPLE_PERIOD = 1U << 8, | ||
123 | }; | ||
124 | |||
125 | /* | ||
126 | * Bits that can be set in attr.read_format to request that | ||
127 | * reads on the counter should return the indicated quantities, | ||
128 | * in increasing order of bit value, after the counter value. | ||
129 | */ | ||
130 | enum perf_counter_read_format { | ||
131 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, | ||
132 | PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, | ||
133 | PERF_FORMAT_ID = 1U << 2, | ||
134 | }; | ||
135 | |||
136 | /* | ||
137 | * Hardware event to monitor via a performance monitoring counter: | ||
138 | */ | ||
139 | struct perf_counter_attr { | ||
140 | /* | ||
141 | * Major type: hardware/software/tracepoint/etc. | ||
142 | */ | ||
143 | __u32 type; | ||
144 | __u32 __reserved_1; | ||
145 | |||
146 | /* | ||
147 | * Type specific configuration information. | ||
148 | */ | ||
149 | __u64 config; | ||
150 | |||
151 | union { | ||
152 | __u64 sample_period; | ||
153 | __u64 sample_freq; | ||
154 | }; | ||
155 | |||
156 | __u64 sample_type; | ||
157 | __u64 read_format; | ||
158 | |||
159 | __u64 disabled : 1, /* off by default */ | ||
160 | inherit : 1, /* children inherit it */ | ||
161 | pinned : 1, /* must always be on PMU */ | ||
162 | exclusive : 1, /* only group on PMU */ | ||
163 | exclude_user : 1, /* don't count user */ | ||
164 | exclude_kernel : 1, /* ditto kernel */ | ||
165 | exclude_hv : 1, /* ditto hypervisor */ | ||
166 | exclude_idle : 1, /* don't count when idle */ | ||
167 | mmap : 1, /* include mmap data */ | ||
168 | comm : 1, /* include comm data */ | ||
169 | freq : 1, /* use freq, not period */ | ||
170 | |||
171 | __reserved_2 : 53; | ||
172 | |||
173 | __u32 wakeup_events; /* wakeup every n events */ | ||
174 | __u32 __reserved_3; | ||
175 | |||
176 | __u64 __reserved_4; | ||
177 | }; | ||
178 | |||
179 | /* | ||
180 | * Ioctls that can be done on a perf counter fd: | ||
181 | */ | ||
182 | #define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) | ||
183 | #define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) | ||
184 | #define PERF_COUNTER_IOC_REFRESH _IO ('$', 2) | ||
185 | #define PERF_COUNTER_IOC_RESET _IO ('$', 3) | ||
186 | #define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) | ||
187 | |||
188 | enum perf_counter_ioc_flags { | ||
189 | PERF_IOC_FLAG_GROUP = 1U << 0, | ||
190 | }; | ||
191 | |||
192 | /* | ||
193 | * Structure of the page that can be mapped via mmap | ||
194 | */ | ||
195 | struct perf_counter_mmap_page { | ||
196 | __u32 version; /* version number of this structure */ | ||
197 | __u32 compat_version; /* lowest version this is compat with */ | ||
198 | |||
199 | /* | ||
200 | * Bits needed to read the hw counters in user-space. | ||
201 | * | ||
202 | * u32 seq; | ||
203 | * s64 count; | ||
204 | * | ||
205 | * do { | ||
206 | * seq = pc->lock; | ||
207 | * | ||
208 | * barrier() | ||
209 | * if (pc->index) { | ||
210 | * count = pmc_read(pc->index - 1); | ||
211 | * count += pc->offset; | ||
212 | * } else | ||
213 | * goto regular_read; | ||
214 | * | ||
215 | * barrier(); | ||
216 | * } while (pc->lock != seq); | ||
217 | * | ||
218 | * NOTE: for obvious reason this only works on self-monitoring | ||
219 | * processes. | ||
220 | */ | ||
221 | __u32 lock; /* seqlock for synchronization */ | ||
222 | __u32 index; /* hardware counter identifier */ | ||
223 | __s64 offset; /* add to hardware counter value */ | ||
224 | |||
225 | /* | ||
226 | * Control data for the mmap() data buffer. | ||
227 | * | ||
228 | * User-space reading this value should issue an rmb(), on SMP capable | ||
229 | * platforms, after reading this value -- see perf_counter_wakeup(). | ||
230 | */ | ||
231 | __u64 data_head; /* head in the data section */ | ||
232 | }; | ||
233 | |||
234 | #define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) | ||
235 | #define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0) | ||
236 | #define PERF_EVENT_MISC_KERNEL (1 << 0) | ||
237 | #define PERF_EVENT_MISC_USER (2 << 0) | ||
238 | #define PERF_EVENT_MISC_HYPERVISOR (3 << 0) | ||
239 | #define PERF_EVENT_MISC_OVERFLOW (1 << 2) | ||
240 | |||
241 | struct perf_event_header { | ||
242 | __u32 type; | ||
243 | __u16 misc; | ||
244 | __u16 size; | ||
245 | }; | ||
246 | |||
247 | enum perf_event_type { | ||
248 | |||
249 | /* | ||
250 | * The MMAP events record the PROT_EXEC mappings so that we can | ||
251 | * correlate userspace IPs to code. They have the following structure: | ||
252 | * | ||
253 | * struct { | ||
254 | * struct perf_event_header header; | ||
255 | * | ||
256 | * u32 pid, tid; | ||
257 | * u64 addr; | ||
258 | * u64 len; | ||
259 | * u64 pgoff; | ||
260 | * char filename[]; | ||
261 | * }; | ||
262 | */ | ||
263 | PERF_EVENT_MMAP = 1, | ||
264 | |||
265 | /* | ||
266 | * struct { | ||
267 | * struct perf_event_header header; | ||
268 | * | ||
269 | * u32 pid, tid; | ||
270 | * char comm[]; | ||
271 | * }; | ||
272 | */ | ||
273 | PERF_EVENT_COMM = 3, | ||
274 | |||
275 | /* | ||
276 | * struct { | ||
277 | * struct perf_event_header header; | ||
278 | * u64 time; | ||
279 | * u64 id; | ||
280 | * u64 sample_period; | ||
281 | * }; | ||
282 | */ | ||
283 | PERF_EVENT_PERIOD = 4, | ||
284 | |||
285 | /* | ||
286 | * struct { | ||
287 | * struct perf_event_header header; | ||
288 | * u64 time; | ||
289 | * u64 id; | ||
290 | * }; | ||
291 | */ | ||
292 | PERF_EVENT_THROTTLE = 5, | ||
293 | PERF_EVENT_UNTHROTTLE = 6, | ||
294 | |||
295 | /* | ||
296 | * struct { | ||
297 | * struct perf_event_header header; | ||
298 | * u32 pid, ppid; | ||
299 | * }; | ||
300 | */ | ||
301 | PERF_EVENT_FORK = 7, | ||
302 | |||
303 | /* | ||
304 | * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field | ||
305 | * will be PERF_RECORD_* | ||
306 | * | ||
307 | * struct { | ||
308 | * struct perf_event_header header; | ||
309 | * | ||
310 | * { u64 ip; } && PERF_RECORD_IP | ||
311 | * { u32 pid, tid; } && PERF_RECORD_TID | ||
312 | * { u64 time; } && PERF_RECORD_TIME | ||
313 | * { u64 addr; } && PERF_RECORD_ADDR | ||
314 | * { u64 config; } && PERF_RECORD_CONFIG | ||
315 | * { u32 cpu, res; } && PERF_RECORD_CPU | ||
316 | * | ||
317 | * { u64 nr; | ||
318 | * { u64 id, val; } cnt[nr]; } && PERF_RECORD_GROUP | ||
319 | * | ||
320 | * { u16 nr, | ||
321 | * hv, | ||
322 | * kernel, | ||
323 | * user; | ||
324 | * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN | ||
325 | * }; | ||
326 | */ | ||
327 | }; | ||
328 | |||
329 | #ifdef __KERNEL__ | ||
330 | /* | ||
331 | * Kernel-internal data types and definitions: | ||
332 | */ | ||
333 | |||
334 | #ifdef CONFIG_PERF_COUNTERS | ||
335 | # include <asm/perf_counter.h> | ||
336 | #endif | ||
337 | |||
338 | #include <linux/list.h> | ||
339 | #include <linux/mutex.h> | ||
340 | #include <linux/rculist.h> | ||
341 | #include <linux/rcupdate.h> | ||
342 | #include <linux/spinlock.h> | ||
343 | #include <linux/hrtimer.h> | ||
344 | #include <linux/fs.h> | ||
345 | #include <linux/pid_namespace.h> | ||
346 | #include <asm/atomic.h> | ||
347 | |||
348 | struct task_struct; | ||
349 | |||
350 | /** | ||
351 | * struct hw_perf_counter - performance counter hardware details: | ||
352 | */ | ||
353 | struct hw_perf_counter { | ||
354 | #ifdef CONFIG_PERF_COUNTERS | ||
355 | union { | ||
356 | struct { /* hardware */ | ||
357 | u64 config; | ||
358 | unsigned long config_base; | ||
359 | unsigned long counter_base; | ||
360 | int idx; | ||
361 | }; | ||
362 | union { /* software */ | ||
363 | atomic64_t count; | ||
364 | struct hrtimer hrtimer; | ||
365 | }; | ||
366 | }; | ||
367 | atomic64_t prev_count; | ||
368 | u64 sample_period; | ||
369 | u64 last_period; | ||
370 | atomic64_t period_left; | ||
371 | u64 interrupts; | ||
372 | |||
373 | u64 freq_count; | ||
374 | u64 freq_interrupts; | ||
375 | u64 freq_stamp; | ||
376 | #endif | ||
377 | }; | ||
378 | |||
379 | struct perf_counter; | ||
380 | |||
381 | /** | ||
382 | * struct pmu - generic performance monitoring unit | ||
383 | */ | ||
384 | struct pmu { | ||
385 | int (*enable) (struct perf_counter *counter); | ||
386 | void (*disable) (struct perf_counter *counter); | ||
387 | void (*read) (struct perf_counter *counter); | ||
388 | void (*unthrottle) (struct perf_counter *counter); | ||
389 | }; | ||
390 | |||
391 | /** | ||
392 | * enum perf_counter_active_state - the states of a counter | ||
393 | */ | ||
394 | enum perf_counter_active_state { | ||
395 | PERF_COUNTER_STATE_ERROR = -2, | ||
396 | PERF_COUNTER_STATE_OFF = -1, | ||
397 | PERF_COUNTER_STATE_INACTIVE = 0, | ||
398 | PERF_COUNTER_STATE_ACTIVE = 1, | ||
399 | }; | ||
400 | |||
401 | struct file; | ||
402 | |||
403 | struct perf_mmap_data { | ||
404 | struct rcu_head rcu_head; | ||
405 | int nr_pages; /* nr of data pages */ | ||
406 | int nr_locked; /* nr pages mlocked */ | ||
407 | |||
408 | atomic_t poll; /* POLL_ for wakeups */ | ||
409 | atomic_t events; /* event limit */ | ||
410 | |||
411 | atomic_long_t head; /* write position */ | ||
412 | atomic_long_t done_head; /* completed head */ | ||
413 | |||
414 | atomic_t lock; /* concurrent writes */ | ||
415 | |||
416 | atomic_t wakeup; /* needs a wakeup */ | ||
417 | |||
418 | struct perf_counter_mmap_page *user_page; | ||
419 | void *data_pages[0]; | ||
420 | }; | ||
421 | |||
422 | struct perf_pending_entry { | ||
423 | struct perf_pending_entry *next; | ||
424 | void (*func)(struct perf_pending_entry *); | ||
425 | }; | ||
426 | |||
427 | /** | ||
428 | * struct perf_counter - performance counter kernel representation: | ||
429 | */ | ||
430 | struct perf_counter { | ||
431 | #ifdef CONFIG_PERF_COUNTERS | ||
432 | struct list_head list_entry; | ||
433 | struct list_head event_entry; | ||
434 | struct list_head sibling_list; | ||
435 | int nr_siblings; | ||
436 | struct perf_counter *group_leader; | ||
437 | const struct pmu *pmu; | ||
438 | |||
439 | enum perf_counter_active_state state; | ||
440 | atomic64_t count; | ||
441 | |||
442 | /* | ||
443 | * These are the total time in nanoseconds that the counter | ||
444 | * has been enabled (i.e. eligible to run, and the task has | ||
445 | * been scheduled in, if this is a per-task counter) | ||
446 | * and running (scheduled onto the CPU), respectively. | ||
447 | * | ||
448 | * They are computed from tstamp_enabled, tstamp_running and | ||
449 | * tstamp_stopped when the counter is in INACTIVE or ACTIVE state. | ||
450 | */ | ||
451 | u64 total_time_enabled; | ||
452 | u64 total_time_running; | ||
453 | |||
454 | /* | ||
455 | * These are timestamps used for computing total_time_enabled | ||
456 | * and total_time_running when the counter is in INACTIVE or | ||
457 | * ACTIVE state, measured in nanoseconds from an arbitrary point | ||
458 | * in time. | ||
459 | * tstamp_enabled: the notional time when the counter was enabled | ||
460 | * tstamp_running: the notional time when the counter was scheduled on | ||
461 | * tstamp_stopped: in INACTIVE state, the notional time when the | ||
462 | * counter was scheduled off. | ||
463 | */ | ||
464 | u64 tstamp_enabled; | ||
465 | u64 tstamp_running; | ||
466 | u64 tstamp_stopped; | ||
467 | |||
468 | struct perf_counter_attr attr; | ||
469 | struct hw_perf_counter hw; | ||
470 | |||
471 | struct perf_counter_context *ctx; | ||
472 | struct file *filp; | ||
473 | |||
474 | /* | ||
475 | * These accumulate total time (in nanoseconds) that children | ||
476 | * counters have been enabled and running, respectively. | ||
477 | */ | ||
478 | atomic64_t child_total_time_enabled; | ||
479 | atomic64_t child_total_time_running; | ||
480 | |||
481 | /* | ||
482 | * Protect attach/detach and child_list: | ||
483 | */ | ||
484 | struct mutex child_mutex; | ||
485 | struct list_head child_list; | ||
486 | struct perf_counter *parent; | ||
487 | |||
488 | int oncpu; | ||
489 | int cpu; | ||
490 | |||
491 | struct list_head owner_entry; | ||
492 | struct task_struct *owner; | ||
493 | |||
494 | /* mmap bits */ | ||
495 | struct mutex mmap_mutex; | ||
496 | atomic_t mmap_count; | ||
497 | struct perf_mmap_data *data; | ||
498 | |||
499 | /* poll related */ | ||
500 | wait_queue_head_t waitq; | ||
501 | struct fasync_struct *fasync; | ||
502 | |||
503 | /* delayed work for NMIs and such */ | ||
504 | int pending_wakeup; | ||
505 | int pending_kill; | ||
506 | int pending_disable; | ||
507 | struct perf_pending_entry pending; | ||
508 | |||
509 | atomic_t event_limit; | ||
510 | |||
511 | void (*destroy)(struct perf_counter *); | ||
512 | struct rcu_head rcu_head; | ||
513 | |||
514 | struct pid_namespace *ns; | ||
515 | u64 id; | ||
516 | #endif | ||
517 | }; | ||
518 | |||
519 | /** | ||
520 | * struct perf_counter_context - counter context structure | ||
521 | * | ||
522 | * Used as a container for task counters and CPU counters as well: | ||
523 | */ | ||
524 | struct perf_counter_context { | ||
525 | /* | ||
526 | * Protect the states of the counters in the list, | ||
527 | * nr_active, and the list: | ||
528 | */ | ||
529 | spinlock_t lock; | ||
530 | /* | ||
531 | * Protect the list of counters. Locking either mutex or lock | ||
532 | * is sufficient to ensure the list doesn't change; to change | ||
533 | * the list you need to lock both the mutex and the spinlock. | ||
534 | */ | ||
535 | struct mutex mutex; | ||
536 | |||
537 | struct list_head counter_list; | ||
538 | struct list_head event_list; | ||
539 | int nr_counters; | ||
540 | int nr_active; | ||
541 | int is_active; | ||
542 | atomic_t refcount; | ||
543 | struct task_struct *task; | ||
544 | |||
545 | /* | ||
546 | * Context clock, runs when context enabled. | ||
547 | */ | ||
548 | u64 time; | ||
549 | u64 timestamp; | ||
550 | |||
551 | /* | ||
552 | * These fields let us detect when two contexts have both | ||
553 | * been cloned (inherited) from a common ancestor. | ||
554 | */ | ||
555 | struct perf_counter_context *parent_ctx; | ||
556 | u64 parent_gen; | ||
557 | u64 generation; | ||
558 | int pin_count; | ||
559 | struct rcu_head rcu_head; | ||
560 | }; | ||
561 | |||
562 | /** | ||
563 | * struct perf_counter_cpu_context - per cpu counter context structure | ||
564 | */ | ||
565 | struct perf_cpu_context { | ||
566 | struct perf_counter_context ctx; | ||
567 | struct perf_counter_context *task_ctx; | ||
568 | int active_oncpu; | ||
569 | int max_pertask; | ||
570 | int exclusive; | ||
571 | |||
572 | /* | ||
573 | * Recursion avoidance: | ||
574 | * | ||
575 | * task, softirq, irq, nmi context | ||
576 | */ | ||
577 | int recursion[4]; | ||
578 | }; | ||
579 | |||
580 | #ifdef CONFIG_PERF_COUNTERS | ||
581 | |||
582 | /* | ||
583 | * Set by architecture code: | ||
584 | */ | ||
585 | extern int perf_max_counters; | ||
586 | |||
587 | extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter); | ||
588 | |||
589 | extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); | ||
590 | extern void perf_counter_task_sched_out(struct task_struct *task, | ||
591 | struct task_struct *next, int cpu); | ||
592 | extern void perf_counter_task_tick(struct task_struct *task, int cpu); | ||
593 | extern int perf_counter_init_task(struct task_struct *child); | ||
594 | extern void perf_counter_exit_task(struct task_struct *child); | ||
595 | extern void perf_counter_free_task(struct task_struct *task); | ||
596 | extern void perf_counter_do_pending(void); | ||
597 | extern void perf_counter_print_debug(void); | ||
598 | extern void __perf_disable(void); | ||
599 | extern bool __perf_enable(void); | ||
600 | extern void perf_disable(void); | ||
601 | extern void perf_enable(void); | ||
602 | extern int perf_counter_task_disable(void); | ||
603 | extern int perf_counter_task_enable(void); | ||
604 | extern int hw_perf_group_sched_in(struct perf_counter *group_leader, | ||
605 | struct perf_cpu_context *cpuctx, | ||
606 | struct perf_counter_context *ctx, int cpu); | ||
607 | extern void perf_counter_update_userpage(struct perf_counter *counter); | ||
608 | |||
609 | struct perf_sample_data { | ||
610 | struct pt_regs *regs; | ||
611 | u64 addr; | ||
612 | u64 period; | ||
613 | }; | ||
614 | |||
615 | extern int perf_counter_overflow(struct perf_counter *counter, int nmi, | ||
616 | struct perf_sample_data *data); | ||
617 | |||
618 | /* | ||
619 | * Return 1 for a software counter, 0 for a hardware counter | ||
620 | */ | ||
621 | static inline int is_software_counter(struct perf_counter *counter) | ||
622 | { | ||
623 | return (counter->attr.type != PERF_TYPE_RAW) && | ||
624 | (counter->attr.type != PERF_TYPE_HARDWARE); | ||
625 | } | ||
626 | |||
627 | extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); | ||
628 | |||
629 | extern void __perf_counter_mmap(struct vm_area_struct *vma); | ||
630 | |||
631 | static inline void perf_counter_mmap(struct vm_area_struct *vma) | ||
632 | { | ||
633 | if (vma->vm_flags & VM_EXEC) | ||
634 | __perf_counter_mmap(vma); | ||
635 | } | ||
636 | |||
637 | extern void perf_counter_comm(struct task_struct *tsk); | ||
638 | extern void perf_counter_fork(struct task_struct *tsk); | ||
639 | |||
640 | extern void perf_counter_task_migration(struct task_struct *task, int cpu); | ||
641 | |||
642 | #define MAX_STACK_DEPTH 255 | ||
643 | |||
644 | struct perf_callchain_entry { | ||
645 | u16 nr; | ||
646 | u16 hv; | ||
647 | u16 kernel; | ||
648 | u16 user; | ||
649 | u64 ip[MAX_STACK_DEPTH]; | ||
650 | }; | ||
651 | |||
652 | extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); | ||
653 | |||
654 | extern int sysctl_perf_counter_paranoid; | ||
655 | extern int sysctl_perf_counter_mlock; | ||
656 | extern int sysctl_perf_counter_sample_rate; | ||
657 | |||
658 | extern void perf_counter_init(void); | ||
659 | |||
660 | #ifndef perf_misc_flags | ||
661 | #define perf_misc_flags(regs) (user_mode(regs) ? PERF_EVENT_MISC_USER : \ | ||
662 | PERF_EVENT_MISC_KERNEL) | ||
663 | #define perf_instruction_pointer(regs) instruction_pointer(regs) | ||
664 | #endif | ||
665 | |||
666 | #else | ||
667 | static inline void | ||
668 | perf_counter_task_sched_in(struct task_struct *task, int cpu) { } | ||
669 | static inline void | ||
670 | perf_counter_task_sched_out(struct task_struct *task, | ||
671 | struct task_struct *next, int cpu) { } | ||
672 | static inline void | ||
673 | perf_counter_task_tick(struct task_struct *task, int cpu) { } | ||
674 | static inline int perf_counter_init_task(struct task_struct *child) { return 0; } | ||
675 | static inline void perf_counter_exit_task(struct task_struct *child) { } | ||
676 | static inline void perf_counter_free_task(struct task_struct *task) { } | ||
677 | static inline void perf_counter_do_pending(void) { } | ||
678 | static inline void perf_counter_print_debug(void) { } | ||
679 | static inline void perf_disable(void) { } | ||
680 | static inline void perf_enable(void) { } | ||
681 | static inline int perf_counter_task_disable(void) { return -EINVAL; } | ||
682 | static inline int perf_counter_task_enable(void) { return -EINVAL; } | ||
683 | |||
684 | static inline void | ||
685 | perf_swcounter_event(u32 event, u64 nr, int nmi, | ||
686 | struct pt_regs *regs, u64 addr) { } | ||
687 | |||
688 | static inline void perf_counter_mmap(struct vm_area_struct *vma) { } | ||
689 | static inline void perf_counter_comm(struct task_struct *tsk) { } | ||
690 | static inline void perf_counter_fork(struct task_struct *tsk) { } | ||
691 | static inline void perf_counter_init(void) { } | ||
692 | static inline void perf_counter_task_migration(struct task_struct *task, | ||
693 | int cpu) { } | ||
694 | #endif | ||
695 | |||
696 | #endif /* __KERNEL__ */ | ||
697 | #endif /* _LINUX_PERF_COUNTER_H */ | ||
diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 48d887e3c6e7..b00df4c79c63 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h | |||
@@ -85,4 +85,7 @@ | |||
85 | #define PR_SET_TIMERSLACK 29 | 85 | #define PR_SET_TIMERSLACK 29 |
86 | #define PR_GET_TIMERSLACK 30 | 86 | #define PR_GET_TIMERSLACK 30 |
87 | 87 | ||
88 | #define PR_TASK_PERF_COUNTERS_DISABLE 31 | ||
89 | #define PR_TASK_PERF_COUNTERS_ENABLE 32 | ||
90 | |||
88 | #endif /* _LINUX_PRCTL_H */ | 91 | #endif /* _LINUX_PRCTL_H */ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 42bf2766111e..4896fdfec913 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -99,6 +99,7 @@ struct robust_list_head; | |||
99 | struct bio; | 99 | struct bio; |
100 | struct fs_struct; | 100 | struct fs_struct; |
101 | struct bts_context; | 101 | struct bts_context; |
102 | struct perf_counter_context; | ||
102 | 103 | ||
103 | /* | 104 | /* |
104 | * List of flags we want to share for kernel threads, | 105 | * List of flags we want to share for kernel threads, |
@@ -139,6 +140,7 @@ extern unsigned long nr_running(void); | |||
139 | extern unsigned long nr_uninterruptible(void); | 140 | extern unsigned long nr_uninterruptible(void); |
140 | extern unsigned long nr_iowait(void); | 141 | extern unsigned long nr_iowait(void); |
141 | extern void calc_global_load(void); | 142 | extern void calc_global_load(void); |
143 | extern u64 cpu_nr_migrations(int cpu); | ||
142 | 144 | ||
143 | extern unsigned long get_parent_ip(unsigned long addr); | 145 | extern unsigned long get_parent_ip(unsigned long addr); |
144 | 146 | ||
@@ -674,6 +676,10 @@ struct user_struct { | |||
674 | struct work_struct work; | 676 | struct work_struct work; |
675 | #endif | 677 | #endif |
676 | #endif | 678 | #endif |
679 | |||
680 | #ifdef CONFIG_PERF_COUNTERS | ||
681 | atomic_long_t locked_vm; | ||
682 | #endif | ||
677 | }; | 683 | }; |
678 | 684 | ||
679 | extern int uids_sysfs_init(void); | 685 | extern int uids_sysfs_init(void); |
@@ -1073,9 +1079,10 @@ struct sched_entity { | |||
1073 | u64 last_wakeup; | 1079 | u64 last_wakeup; |
1074 | u64 avg_overlap; | 1080 | u64 avg_overlap; |
1075 | 1081 | ||
1082 | u64 nr_migrations; | ||
1083 | |||
1076 | u64 start_runtime; | 1084 | u64 start_runtime; |
1077 | u64 avg_wakeup; | 1085 | u64 avg_wakeup; |
1078 | u64 nr_migrations; | ||
1079 | 1086 | ||
1080 | #ifdef CONFIG_SCHEDSTATS | 1087 | #ifdef CONFIG_SCHEDSTATS |
1081 | u64 wait_start; | 1088 | u64 wait_start; |
@@ -1396,6 +1403,11 @@ struct task_struct { | |||
1396 | struct list_head pi_state_list; | 1403 | struct list_head pi_state_list; |
1397 | struct futex_pi_state *pi_state_cache; | 1404 | struct futex_pi_state *pi_state_cache; |
1398 | #endif | 1405 | #endif |
1406 | #ifdef CONFIG_PERF_COUNTERS | ||
1407 | struct perf_counter_context *perf_counter_ctxp; | ||
1408 | struct mutex perf_counter_mutex; | ||
1409 | struct list_head perf_counter_list; | ||
1410 | #endif | ||
1399 | #ifdef CONFIG_NUMA | 1411 | #ifdef CONFIG_NUMA |
1400 | struct mempolicy *mempolicy; | 1412 | struct mempolicy *mempolicy; |
1401 | short il_next; | 1413 | short il_next; |
@@ -2410,6 +2422,13 @@ static inline void inc_syscw(struct task_struct *tsk) | |||
2410 | #define TASK_SIZE_OF(tsk) TASK_SIZE | 2422 | #define TASK_SIZE_OF(tsk) TASK_SIZE |
2411 | #endif | 2423 | #endif |
2412 | 2424 | ||
2425 | /* | ||
2426 | * Call the function if the target task is executing on a CPU right now: | ||
2427 | */ | ||
2428 | extern void task_oncpu_function_call(struct task_struct *p, | ||
2429 | void (*func) (void *info), void *info); | ||
2430 | |||
2431 | |||
2413 | #ifdef CONFIG_MM_OWNER | 2432 | #ifdef CONFIG_MM_OWNER |
2414 | extern void mm_update_next_owner(struct mm_struct *mm); | 2433 | extern void mm_update_next_owner(struct mm_struct *mm); |
2415 | extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); | 2434 | extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); |
diff --git a/include/linux/slab.h b/include/linux/slab.h index 24c5602bee99..48803064cedf 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h | |||
@@ -62,6 +62,8 @@ | |||
62 | # define SLAB_DEBUG_OBJECTS 0x00000000UL | 62 | # define SLAB_DEBUG_OBJECTS 0x00000000UL |
63 | #endif | 63 | #endif |
64 | 64 | ||
65 | #define SLAB_NOLEAKTRACE 0x00800000UL /* Avoid kmemleak tracing */ | ||
66 | |||
65 | /* The following flags affect the page allocator grouping pages by mobility */ | 67 | /* The following flags affect the page allocator grouping pages by mobility */ |
66 | #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ | 68 | #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ |
67 | #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ | 69 | #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ |
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 30520844b8da..c6c84ad8bd71 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h | |||
@@ -55,6 +55,7 @@ struct compat_timeval; | |||
55 | struct robust_list_head; | 55 | struct robust_list_head; |
56 | struct getcpu_cache; | 56 | struct getcpu_cache; |
57 | struct old_linux_dirent; | 57 | struct old_linux_dirent; |
58 | struct perf_counter_attr; | ||
58 | 59 | ||
59 | #include <linux/types.h> | 60 | #include <linux/types.h> |
60 | #include <linux/aio_abi.h> | 61 | #include <linux/aio_abi.h> |
@@ -755,4 +756,8 @@ asmlinkage long sys_pipe(int __user *); | |||
755 | 756 | ||
756 | int kernel_execve(const char *filename, char *const argv[], char *const envp[]); | 757 | int kernel_execve(const char *filename, char *const argv[], char *const envp[]); |
757 | 758 | ||
759 | |||
760 | asmlinkage long sys_perf_counter_open( | ||
761 | const struct perf_counter_attr __user *attr_uptr, | ||
762 | pid_t pid, int cpu, int group_fd, unsigned long flags); | ||
758 | #endif | 763 | #endif |
diff --git a/init/Kconfig b/init/Kconfig index 5de1c17c51ed..c649657e2259 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -934,6 +934,40 @@ config AIO | |||
934 | by some high performance threaded applications. Disabling | 934 | by some high performance threaded applications. Disabling |
935 | this option saves about 7k. | 935 | this option saves about 7k. |
936 | 936 | ||
937 | config HAVE_PERF_COUNTERS | ||
938 | bool | ||
939 | |||
940 | menu "Performance Counters" | ||
941 | |||
942 | config PERF_COUNTERS | ||
943 | bool "Kernel Performance Counters" | ||
944 | depends on HAVE_PERF_COUNTERS | ||
945 | select ANON_INODES | ||
946 | help | ||
947 | Enable kernel support for performance counter hardware. | ||
948 | |||
949 | Performance counters are special hardware registers available | ||
950 | on most modern CPUs. These registers count the number of certain | ||
951 | types of hw events: such as instructions executed, cachemisses | ||
952 | suffered, or branches mis-predicted - without slowing down the | ||
953 | kernel or applications. These registers can also trigger interrupts | ||
954 | when a threshold number of events have passed - and can thus be | ||
955 | used to profile the code that runs on that CPU. | ||
956 | |||
957 | The Linux Performance Counter subsystem provides an abstraction of | ||
958 | these hardware capabilities, available via a system call. It | ||
959 | provides per task and per CPU counters, and it provides event | ||
960 | capabilities on top of those. | ||
961 | |||
962 | Say Y if unsure. | ||
963 | |||
964 | config EVENT_PROFILE | ||
965 | bool "Tracepoint profile sources" | ||
966 | depends on PERF_COUNTERS && EVENT_TRACER | ||
967 | default y | ||
968 | |||
969 | endmenu | ||
970 | |||
937 | config VM_EVENT_COUNTERS | 971 | config VM_EVENT_COUNTERS |
938 | default y | 972 | default y |
939 | bool "Enable VM event counters for /proc/vmstat" if EMBEDDED | 973 | bool "Enable VM event counters for /proc/vmstat" if EMBEDDED |
diff --git a/init/main.c b/init/main.c index bb7dc57eee36..5616661eac01 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <linux/debug_locks.h> | 56 | #include <linux/debug_locks.h> |
57 | #include <linux/debugobjects.h> | 57 | #include <linux/debugobjects.h> |
58 | #include <linux/lockdep.h> | 58 | #include <linux/lockdep.h> |
59 | #include <linux/kmemleak.h> | ||
59 | #include <linux/pid_namespace.h> | 60 | #include <linux/pid_namespace.h> |
60 | #include <linux/device.h> | 61 | #include <linux/device.h> |
61 | #include <linux/kthread.h> | 62 | #include <linux/kthread.h> |
@@ -533,6 +534,16 @@ void __init __weak thread_info_cache_init(void) | |||
533 | { | 534 | { |
534 | } | 535 | } |
535 | 536 | ||
537 | /* | ||
538 | * Set up kernel memory allocators | ||
539 | */ | ||
540 | static void __init mm_init(void) | ||
541 | { | ||
542 | mem_init(); | ||
543 | kmem_cache_init(); | ||
544 | vmalloc_init(); | ||
545 | } | ||
546 | |||
536 | asmlinkage void __init start_kernel(void) | 547 | asmlinkage void __init start_kernel(void) |
537 | { | 548 | { |
538 | char * command_line; | 549 | char * command_line; |
@@ -574,6 +585,23 @@ asmlinkage void __init start_kernel(void) | |||
574 | setup_nr_cpu_ids(); | 585 | setup_nr_cpu_ids(); |
575 | smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ | 586 | smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ |
576 | 587 | ||
588 | build_all_zonelists(); | ||
589 | page_alloc_init(); | ||
590 | |||
591 | printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); | ||
592 | parse_early_param(); | ||
593 | parse_args("Booting kernel", static_command_line, __start___param, | ||
594 | __stop___param - __start___param, | ||
595 | &unknown_bootoption); | ||
596 | /* | ||
597 | * These use large bootmem allocations and must precede | ||
598 | * kmem_cache_init() | ||
599 | */ | ||
600 | pidhash_init(); | ||
601 | vfs_caches_init_early(); | ||
602 | sort_main_extable(); | ||
603 | trap_init(); | ||
604 | mm_init(); | ||
577 | /* | 605 | /* |
578 | * Set up the scheduler prior starting any interrupts (such as the | 606 | * Set up the scheduler prior starting any interrupts (such as the |
579 | * timer interrupt). Full topology setup happens at smp_init() | 607 | * timer interrupt). Full topology setup happens at smp_init() |
@@ -585,25 +613,16 @@ asmlinkage void __init start_kernel(void) | |||
585 | * fragile until we cpu_idle() for the first time. | 613 | * fragile until we cpu_idle() for the first time. |
586 | */ | 614 | */ |
587 | preempt_disable(); | 615 | preempt_disable(); |
588 | build_all_zonelists(); | ||
589 | page_alloc_init(); | ||
590 | printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); | ||
591 | parse_early_param(); | ||
592 | parse_args("Booting kernel", static_command_line, __start___param, | ||
593 | __stop___param - __start___param, | ||
594 | &unknown_bootoption); | ||
595 | if (!irqs_disabled()) { | 616 | if (!irqs_disabled()) { |
596 | printk(KERN_WARNING "start_kernel(): bug: interrupts were " | 617 | printk(KERN_WARNING "start_kernel(): bug: interrupts were " |
597 | "enabled *very* early, fixing it\n"); | 618 | "enabled *very* early, fixing it\n"); |
598 | local_irq_disable(); | 619 | local_irq_disable(); |
599 | } | 620 | } |
600 | sort_main_extable(); | ||
601 | trap_init(); | ||
602 | rcu_init(); | 621 | rcu_init(); |
603 | /* init some links before init_ISA_irqs() */ | 622 | /* init some links before init_ISA_irqs() */ |
604 | early_irq_init(); | 623 | early_irq_init(); |
605 | init_IRQ(); | 624 | init_IRQ(); |
606 | pidhash_init(); | 625 | prio_tree_init(); |
607 | init_timers(); | 626 | init_timers(); |
608 | hrtimers_init(); | 627 | hrtimers_init(); |
609 | softirq_init(); | 628 | softirq_init(); |
@@ -645,15 +664,12 @@ asmlinkage void __init start_kernel(void) | |||
645 | initrd_start = 0; | 664 | initrd_start = 0; |
646 | } | 665 | } |
647 | #endif | 666 | #endif |
648 | vmalloc_init(); | ||
649 | vfs_caches_init_early(); | ||
650 | cpuset_init_early(); | 667 | cpuset_init_early(); |
651 | page_cgroup_init(); | 668 | page_cgroup_init(); |
652 | mem_init(); | ||
653 | enable_debug_pagealloc(); | 669 | enable_debug_pagealloc(); |
654 | cpu_hotplug_init(); | 670 | cpu_hotplug_init(); |
655 | kmem_cache_init(); | ||
656 | kmemtrace_init(); | 671 | kmemtrace_init(); |
672 | kmemleak_init(); | ||
657 | debug_objects_mem_init(); | 673 | debug_objects_mem_init(); |
658 | idr_init_cache(); | 674 | idr_init_cache(); |
659 | setup_per_cpu_pageset(); | 675 | setup_per_cpu_pageset(); |
@@ -663,7 +679,6 @@ asmlinkage void __init start_kernel(void) | |||
663 | calibrate_delay(); | 679 | calibrate_delay(); |
664 | pidmap_init(); | 680 | pidmap_init(); |
665 | pgtable_cache_init(); | 681 | pgtable_cache_init(); |
666 | prio_tree_init(); | ||
667 | anon_vma_init(); | 682 | anon_vma_init(); |
668 | #ifdef CONFIG_X86 | 683 | #ifdef CONFIG_X86 |
669 | if (efi_enabled) | 684 | if (efi_enabled) |
diff --git a/kernel/Makefile b/kernel/Makefile index a35eee3436de..90b53f6dc226 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -96,6 +96,7 @@ obj-$(CONFIG_TRACING) += trace/ | |||
96 | obj-$(CONFIG_X86_DS) += trace/ | 96 | obj-$(CONFIG_X86_DS) += trace/ |
97 | obj-$(CONFIG_SMP) += sched_cpupri.o | 97 | obj-$(CONFIG_SMP) += sched_cpupri.o |
98 | obj-$(CONFIG_SLOW_WORK) += slow-work.o | 98 | obj-$(CONFIG_SLOW_WORK) += slow-work.o |
99 | obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o | ||
99 | 100 | ||
100 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | 101 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) |
101 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 102 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 026faccca869..d5a7e17474ee 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1857,7 +1857,7 @@ struct cgroup_subsys cpuset_subsys = { | |||
1857 | 1857 | ||
1858 | int __init cpuset_init_early(void) | 1858 | int __init cpuset_init_early(void) |
1859 | { | 1859 | { |
1860 | alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed); | 1860 | alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_NOWAIT); |
1861 | 1861 | ||
1862 | top_cpuset.mems_generation = cpuset_mems_generation++; | 1862 | top_cpuset.mems_generation = cpuset_mems_generation++; |
1863 | return 0; | 1863 | return 0; |
diff --git a/kernel/exit.c b/kernel/exit.c index 51d1fe3fb7ad..b6c90b5ef509 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <linux/tracehook.h> | 48 | #include <linux/tracehook.h> |
49 | #include <linux/fs_struct.h> | 49 | #include <linux/fs_struct.h> |
50 | #include <linux/init_task.h> | 50 | #include <linux/init_task.h> |
51 | #include <linux/perf_counter.h> | ||
51 | #include <trace/events/sched.h> | 52 | #include <trace/events/sched.h> |
52 | 53 | ||
53 | #include <asm/uaccess.h> | 54 | #include <asm/uaccess.h> |
@@ -154,6 +155,9 @@ static void delayed_put_task_struct(struct rcu_head *rhp) | |||
154 | { | 155 | { |
155 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); | 156 | struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); |
156 | 157 | ||
158 | #ifdef CONFIG_PERF_COUNTERS | ||
159 | WARN_ON_ONCE(tsk->perf_counter_ctxp); | ||
160 | #endif | ||
157 | trace_sched_process_free(tsk); | 161 | trace_sched_process_free(tsk); |
158 | put_task_struct(tsk); | 162 | put_task_struct(tsk); |
159 | } | 163 | } |
@@ -170,6 +174,7 @@ repeat: | |||
170 | atomic_dec(&__task_cred(p)->user->processes); | 174 | atomic_dec(&__task_cred(p)->user->processes); |
171 | 175 | ||
172 | proc_flush_task(p); | 176 | proc_flush_task(p); |
177 | |||
173 | write_lock_irq(&tasklist_lock); | 178 | write_lock_irq(&tasklist_lock); |
174 | tracehook_finish_release_task(p); | 179 | tracehook_finish_release_task(p); |
175 | __exit_signal(p); | 180 | __exit_signal(p); |
@@ -971,16 +976,19 @@ NORET_TYPE void do_exit(long code) | |||
971 | module_put(tsk->binfmt->module); | 976 | module_put(tsk->binfmt->module); |
972 | 977 | ||
973 | proc_exit_connector(tsk); | 978 | proc_exit_connector(tsk); |
979 | |||
980 | /* | ||
981 | * Flush inherited counters to the parent - before the parent | ||
982 | * gets woken up by child-exit notifications. | ||
983 | */ | ||
984 | perf_counter_exit_task(tsk); | ||
985 | |||
974 | exit_notify(tsk, group_dead); | 986 | exit_notify(tsk, group_dead); |
975 | #ifdef CONFIG_NUMA | 987 | #ifdef CONFIG_NUMA |
976 | mpol_put(tsk->mempolicy); | 988 | mpol_put(tsk->mempolicy); |
977 | tsk->mempolicy = NULL; | 989 | tsk->mempolicy = NULL; |
978 | #endif | 990 | #endif |
979 | #ifdef CONFIG_FUTEX | 991 | #ifdef CONFIG_FUTEX |
980 | /* | ||
981 | * This must happen late, after the PID is not | ||
982 | * hashed anymore: | ||
983 | */ | ||
984 | if (unlikely(!list_empty(&tsk->pi_state_list))) | 992 | if (unlikely(!list_empty(&tsk->pi_state_list))) |
985 | exit_pi_state_list(tsk); | 993 | exit_pi_state_list(tsk); |
986 | if (unlikely(current->pi_state_cache)) | 994 | if (unlikely(current->pi_state_cache)) |
diff --git a/kernel/fork.c b/kernel/fork.c index bb762b4dd217..4430eb1376f2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -62,6 +62,7 @@ | |||
62 | #include <linux/blkdev.h> | 62 | #include <linux/blkdev.h> |
63 | #include <linux/fs_struct.h> | 63 | #include <linux/fs_struct.h> |
64 | #include <linux/magic.h> | 64 | #include <linux/magic.h> |
65 | #include <linux/perf_counter.h> | ||
65 | 66 | ||
66 | #include <asm/pgtable.h> | 67 | #include <asm/pgtable.h> |
67 | #include <asm/pgalloc.h> | 68 | #include <asm/pgalloc.h> |
@@ -1096,6 +1097,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1096 | /* Perform scheduler related setup. Assign this task to a CPU. */ | 1097 | /* Perform scheduler related setup. Assign this task to a CPU. */ |
1097 | sched_fork(p, clone_flags); | 1098 | sched_fork(p, clone_flags); |
1098 | 1099 | ||
1100 | retval = perf_counter_init_task(p); | ||
1101 | if (retval) | ||
1102 | goto bad_fork_cleanup_policy; | ||
1103 | |||
1099 | if ((retval = audit_alloc(p))) | 1104 | if ((retval = audit_alloc(p))) |
1100 | goto bad_fork_cleanup_policy; | 1105 | goto bad_fork_cleanup_policy; |
1101 | /* copy all the process information */ | 1106 | /* copy all the process information */ |
@@ -1290,6 +1295,7 @@ bad_fork_cleanup_semundo: | |||
1290 | bad_fork_cleanup_audit: | 1295 | bad_fork_cleanup_audit: |
1291 | audit_free(p); | 1296 | audit_free(p); |
1292 | bad_fork_cleanup_policy: | 1297 | bad_fork_cleanup_policy: |
1298 | perf_counter_free_task(p); | ||
1293 | #ifdef CONFIG_NUMA | 1299 | #ifdef CONFIG_NUMA |
1294 | mpol_put(p->mempolicy); | 1300 | mpol_put(p->mempolicy); |
1295 | bad_fork_cleanup_cgroup: | 1301 | bad_fork_cleanup_cgroup: |
@@ -1403,6 +1409,12 @@ long do_fork(unsigned long clone_flags, | |||
1403 | if (clone_flags & CLONE_VFORK) { | 1409 | if (clone_flags & CLONE_VFORK) { |
1404 | p->vfork_done = &vfork; | 1410 | p->vfork_done = &vfork; |
1405 | init_completion(&vfork); | 1411 | init_completion(&vfork); |
1412 | } else if (!(clone_flags & CLONE_VM)) { | ||
1413 | /* | ||
1414 | * vfork will do an exec which will call | ||
1415 | * set_task_comm() | ||
1416 | */ | ||
1417 | perf_counter_fork(p); | ||
1406 | } | 1418 | } |
1407 | 1419 | ||
1408 | audit_finish_fork(p); | 1420 | audit_finish_fork(p); |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index a60018402f42..104578541230 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -150,6 +150,7 @@ int __init early_irq_init(void) | |||
150 | { | 150 | { |
151 | struct irq_desc *desc; | 151 | struct irq_desc *desc; |
152 | int legacy_count; | 152 | int legacy_count; |
153 | int node; | ||
153 | int i; | 154 | int i; |
154 | 155 | ||
155 | init_irq_default_affinity(); | 156 | init_irq_default_affinity(); |
@@ -160,20 +161,20 @@ int __init early_irq_init(void) | |||
160 | 161 | ||
161 | desc = irq_desc_legacy; | 162 | desc = irq_desc_legacy; |
162 | legacy_count = ARRAY_SIZE(irq_desc_legacy); | 163 | legacy_count = ARRAY_SIZE(irq_desc_legacy); |
164 | node = first_online_node; | ||
163 | 165 | ||
164 | /* allocate irq_desc_ptrs array based on nr_irqs */ | 166 | /* allocate irq_desc_ptrs array based on nr_irqs */ |
165 | irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *)); | 167 | irq_desc_ptrs = kcalloc(nr_irqs, sizeof(void *), GFP_NOWAIT); |
166 | 168 | ||
167 | /* allocate based on nr_cpu_ids */ | 169 | /* allocate based on nr_cpu_ids */ |
168 | /* FIXME: invert kstat_irgs, and it'd be a per_cpu_alloc'd thing */ | 170 | kstat_irqs_legacy = kzalloc_node(NR_IRQS_LEGACY * nr_cpu_ids * |
169 | kstat_irqs_legacy = alloc_bootmem(NR_IRQS_LEGACY * nr_cpu_ids * | 171 | sizeof(int), GFP_NOWAIT, node); |
170 | sizeof(int)); | ||
171 | 172 | ||
172 | for (i = 0; i < legacy_count; i++) { | 173 | for (i = 0; i < legacy_count; i++) { |
173 | desc[i].irq = i; | 174 | desc[i].irq = i; |
174 | desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids; | 175 | desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids; |
175 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); | 176 | lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); |
176 | alloc_desc_masks(&desc[i], 0, true); | 177 | alloc_desc_masks(&desc[i], node, true); |
177 | init_desc_masks(&desc[i]); | 178 | init_desc_masks(&desc[i]); |
178 | irq_desc_ptrs[i] = desc + i; | 179 | irq_desc_ptrs[i] = desc + i; |
179 | } | 180 | } |
diff --git a/kernel/module.c b/kernel/module.c index 278e9b6762bb..35f7de00bf0d 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <linux/ftrace.h> | 53 | #include <linux/ftrace.h> |
54 | #include <linux/async.h> | 54 | #include <linux/async.h> |
55 | #include <linux/percpu.h> | 55 | #include <linux/percpu.h> |
56 | #include <linux/kmemleak.h> | ||
56 | 57 | ||
57 | #if 0 | 58 | #if 0 |
58 | #define DEBUGP printk | 59 | #define DEBUGP printk |
@@ -433,6 +434,7 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, | |||
433 | unsigned long extra; | 434 | unsigned long extra; |
434 | unsigned int i; | 435 | unsigned int i; |
435 | void *ptr; | 436 | void *ptr; |
437 | int cpu; | ||
436 | 438 | ||
437 | if (align > PAGE_SIZE) { | 439 | if (align > PAGE_SIZE) { |
438 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", | 440 | printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", |
@@ -462,6 +464,11 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, | |||
462 | if (!split_block(i, size)) | 464 | if (!split_block(i, size)) |
463 | return NULL; | 465 | return NULL; |
464 | 466 | ||
467 | /* add the per-cpu scanning areas */ | ||
468 | for_each_possible_cpu(cpu) | ||
469 | kmemleak_alloc(ptr + per_cpu_offset(cpu), size, 0, | ||
470 | GFP_KERNEL); | ||
471 | |||
465 | /* Mark allocated */ | 472 | /* Mark allocated */ |
466 | pcpu_size[i] = -pcpu_size[i]; | 473 | pcpu_size[i] = -pcpu_size[i]; |
467 | return ptr; | 474 | return ptr; |
@@ -476,6 +483,7 @@ static void percpu_modfree(void *freeme) | |||
476 | { | 483 | { |
477 | unsigned int i; | 484 | unsigned int i; |
478 | void *ptr = __per_cpu_start + block_size(pcpu_size[0]); | 485 | void *ptr = __per_cpu_start + block_size(pcpu_size[0]); |
486 | int cpu; | ||
479 | 487 | ||
480 | /* First entry is core kernel percpu data. */ | 488 | /* First entry is core kernel percpu data. */ |
481 | for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { | 489 | for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { |
@@ -487,6 +495,10 @@ static void percpu_modfree(void *freeme) | |||
487 | BUG(); | 495 | BUG(); |
488 | 496 | ||
489 | free: | 497 | free: |
498 | /* remove the per-cpu scanning areas */ | ||
499 | for_each_possible_cpu(cpu) | ||
500 | kmemleak_free(freeme + per_cpu_offset(cpu)); | ||
501 | |||
490 | /* Merge with previous? */ | 502 | /* Merge with previous? */ |
491 | if (pcpu_size[i-1] >= 0) { | 503 | if (pcpu_size[i-1] >= 0) { |
492 | pcpu_size[i-1] += pcpu_size[i]; | 504 | pcpu_size[i-1] += pcpu_size[i]; |
@@ -1879,6 +1891,36 @@ static void *module_alloc_update_bounds(unsigned long size) | |||
1879 | return ret; | 1891 | return ret; |
1880 | } | 1892 | } |
1881 | 1893 | ||
1894 | #ifdef CONFIG_DEBUG_KMEMLEAK | ||
1895 | static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, | ||
1896 | Elf_Shdr *sechdrs, char *secstrings) | ||
1897 | { | ||
1898 | unsigned int i; | ||
1899 | |||
1900 | /* only scan the sections containing data */ | ||
1901 | kmemleak_scan_area(mod->module_core, (unsigned long)mod - | ||
1902 | (unsigned long)mod->module_core, | ||
1903 | sizeof(struct module), GFP_KERNEL); | ||
1904 | |||
1905 | for (i = 1; i < hdr->e_shnum; i++) { | ||
1906 | if (!(sechdrs[i].sh_flags & SHF_ALLOC)) | ||
1907 | continue; | ||
1908 | if (strncmp(secstrings + sechdrs[i].sh_name, ".data", 5) != 0 | ||
1909 | && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0) | ||
1910 | continue; | ||
1911 | |||
1912 | kmemleak_scan_area(mod->module_core, sechdrs[i].sh_addr - | ||
1913 | (unsigned long)mod->module_core, | ||
1914 | sechdrs[i].sh_size, GFP_KERNEL); | ||
1915 | } | ||
1916 | } | ||
1917 | #else | ||
1918 | static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, | ||
1919 | Elf_Shdr *sechdrs, char *secstrings) | ||
1920 | { | ||
1921 | } | ||
1922 | #endif | ||
1923 | |||
1882 | /* Allocate and load the module: note that size of section 0 is always | 1924 | /* Allocate and load the module: note that size of section 0 is always |
1883 | zero, and we rely on this for optional sections. */ | 1925 | zero, and we rely on this for optional sections. */ |
1884 | static noinline struct module *load_module(void __user *umod, | 1926 | static noinline struct module *load_module(void __user *umod, |
@@ -2049,6 +2091,12 @@ static noinline struct module *load_module(void __user *umod, | |||
2049 | 2091 | ||
2050 | /* Do the allocs. */ | 2092 | /* Do the allocs. */ |
2051 | ptr = module_alloc_update_bounds(mod->core_size); | 2093 | ptr = module_alloc_update_bounds(mod->core_size); |
2094 | /* | ||
2095 | * The pointer to this block is stored in the module structure | ||
2096 | * which is inside the block. Just mark it as not being a | ||
2097 | * leak. | ||
2098 | */ | ||
2099 | kmemleak_not_leak(ptr); | ||
2052 | if (!ptr) { | 2100 | if (!ptr) { |
2053 | err = -ENOMEM; | 2101 | err = -ENOMEM; |
2054 | goto free_percpu; | 2102 | goto free_percpu; |
@@ -2057,6 +2105,13 @@ static noinline struct module *load_module(void __user *umod, | |||
2057 | mod->module_core = ptr; | 2105 | mod->module_core = ptr; |
2058 | 2106 | ||
2059 | ptr = module_alloc_update_bounds(mod->init_size); | 2107 | ptr = module_alloc_update_bounds(mod->init_size); |
2108 | /* | ||
2109 | * The pointer to this block is stored in the module structure | ||
2110 | * which is inside the block. This block doesn't need to be | ||
2111 | * scanned as it contains data and code that will be freed | ||
2112 | * after the module is initialized. | ||
2113 | */ | ||
2114 | kmemleak_ignore(ptr); | ||
2060 | if (!ptr && mod->init_size) { | 2115 | if (!ptr && mod->init_size) { |
2061 | err = -ENOMEM; | 2116 | err = -ENOMEM; |
2062 | goto free_core; | 2117 | goto free_core; |
@@ -2087,6 +2142,7 @@ static noinline struct module *load_module(void __user *umod, | |||
2087 | } | 2142 | } |
2088 | /* Module has been moved. */ | 2143 | /* Module has been moved. */ |
2089 | mod = (void *)sechdrs[modindex].sh_addr; | 2144 | mod = (void *)sechdrs[modindex].sh_addr; |
2145 | kmemleak_load_module(mod, hdr, sechdrs, secstrings); | ||
2090 | 2146 | ||
2091 | #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) | 2147 | #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) |
2092 | mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), | 2148 | mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), |
diff --git a/kernel/mutex.c b/kernel/mutex.c index e5cc0cd28d54..947b3ad551f8 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -89,7 +89,7 @@ __mutex_lock_slowpath(atomic_t *lock_count); | |||
89 | * | 89 | * |
90 | * This function is similar to (but not equivalent to) down(). | 90 | * This function is similar to (but not equivalent to) down(). |
91 | */ | 91 | */ |
92 | void inline __sched mutex_lock(struct mutex *lock) | 92 | void __sched mutex_lock(struct mutex *lock) |
93 | { | 93 | { |
94 | might_sleep(); | 94 | might_sleep(); |
95 | /* | 95 | /* |
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c new file mode 100644 index 000000000000..ef5d8a5b2453 --- /dev/null +++ b/kernel/perf_counter.c | |||
@@ -0,0 +1,4260 @@ | |||
1 | /* | ||
2 | * Performance counter core code | ||
3 | * | ||
4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | ||
6 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
7 | * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> | ||
8 | * | ||
9 | * For licensing details see kernel-base/COPYING | ||
10 | */ | ||
11 | |||
12 | #include <linux/fs.h> | ||
13 | #include <linux/mm.h> | ||
14 | #include <linux/cpu.h> | ||
15 | #include <linux/smp.h> | ||
16 | #include <linux/file.h> | ||
17 | #include <linux/poll.h> | ||
18 | #include <linux/sysfs.h> | ||
19 | #include <linux/dcache.h> | ||
20 | #include <linux/percpu.h> | ||
21 | #include <linux/ptrace.h> | ||
22 | #include <linux/vmstat.h> | ||
23 | #include <linux/hardirq.h> | ||
24 | #include <linux/rculist.h> | ||
25 | #include <linux/uaccess.h> | ||
26 | #include <linux/syscalls.h> | ||
27 | #include <linux/anon_inodes.h> | ||
28 | #include <linux/kernel_stat.h> | ||
29 | #include <linux/perf_counter.h> | ||
30 | |||
31 | #include <asm/irq_regs.h> | ||
32 | |||
33 | /* | ||
34 | * Each CPU has a list of per CPU counters: | ||
35 | */ | ||
36 | DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); | ||
37 | |||
38 | int perf_max_counters __read_mostly = 1; | ||
39 | static int perf_reserved_percpu __read_mostly; | ||
40 | static int perf_overcommit __read_mostly = 1; | ||
41 | |||
42 | static atomic_t nr_counters __read_mostly; | ||
43 | static atomic_t nr_mmap_counters __read_mostly; | ||
44 | static atomic_t nr_comm_counters __read_mostly; | ||
45 | |||
46 | /* | ||
47 | * perf counter paranoia level: | ||
48 | * 0 - not paranoid | ||
49 | * 1 - disallow cpu counters to unpriv | ||
50 | * 2 - disallow kernel profiling to unpriv | ||
51 | */ | ||
52 | int sysctl_perf_counter_paranoid __read_mostly; | ||
53 | |||
54 | static inline bool perf_paranoid_cpu(void) | ||
55 | { | ||
56 | return sysctl_perf_counter_paranoid > 0; | ||
57 | } | ||
58 | |||
59 | static inline bool perf_paranoid_kernel(void) | ||
60 | { | ||
61 | return sysctl_perf_counter_paranoid > 1; | ||
62 | } | ||
63 | |||
64 | int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */ | ||
65 | |||
66 | /* | ||
67 | * max perf counter sample rate | ||
68 | */ | ||
69 | int sysctl_perf_counter_sample_rate __read_mostly = 100000; | ||
70 | |||
71 | static atomic64_t perf_counter_id; | ||
72 | |||
73 | /* | ||
74 | * Lock for (sysadmin-configurable) counter reservations: | ||
75 | */ | ||
76 | static DEFINE_SPINLOCK(perf_resource_lock); | ||
77 | |||
78 | /* | ||
79 | * Architecture provided APIs - weak aliases: | ||
80 | */ | ||
81 | extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | ||
82 | { | ||
83 | return NULL; | ||
84 | } | ||
85 | |||
86 | void __weak hw_perf_disable(void) { barrier(); } | ||
87 | void __weak hw_perf_enable(void) { barrier(); } | ||
88 | |||
89 | void __weak hw_perf_counter_setup(int cpu) { barrier(); } | ||
90 | |||
91 | int __weak | ||
92 | hw_perf_group_sched_in(struct perf_counter *group_leader, | ||
93 | struct perf_cpu_context *cpuctx, | ||
94 | struct perf_counter_context *ctx, int cpu) | ||
95 | { | ||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | void __weak perf_counter_print_debug(void) { } | ||
100 | |||
101 | static DEFINE_PER_CPU(int, disable_count); | ||
102 | |||
103 | void __perf_disable(void) | ||
104 | { | ||
105 | __get_cpu_var(disable_count)++; | ||
106 | } | ||
107 | |||
108 | bool __perf_enable(void) | ||
109 | { | ||
110 | return !--__get_cpu_var(disable_count); | ||
111 | } | ||
112 | |||
113 | void perf_disable(void) | ||
114 | { | ||
115 | __perf_disable(); | ||
116 | hw_perf_disable(); | ||
117 | } | ||
118 | |||
119 | void perf_enable(void) | ||
120 | { | ||
121 | if (__perf_enable()) | ||
122 | hw_perf_enable(); | ||
123 | } | ||
124 | |||
125 | static void get_ctx(struct perf_counter_context *ctx) | ||
126 | { | ||
127 | atomic_inc(&ctx->refcount); | ||
128 | } | ||
129 | |||
130 | static void free_ctx(struct rcu_head *head) | ||
131 | { | ||
132 | struct perf_counter_context *ctx; | ||
133 | |||
134 | ctx = container_of(head, struct perf_counter_context, rcu_head); | ||
135 | kfree(ctx); | ||
136 | } | ||
137 | |||
138 | static void put_ctx(struct perf_counter_context *ctx) | ||
139 | { | ||
140 | if (atomic_dec_and_test(&ctx->refcount)) { | ||
141 | if (ctx->parent_ctx) | ||
142 | put_ctx(ctx->parent_ctx); | ||
143 | if (ctx->task) | ||
144 | put_task_struct(ctx->task); | ||
145 | call_rcu(&ctx->rcu_head, free_ctx); | ||
146 | } | ||
147 | } | ||
148 | |||
149 | /* | ||
150 | * Get the perf_counter_context for a task and lock it. | ||
151 | * This has to cope with with the fact that until it is locked, | ||
152 | * the context could get moved to another task. | ||
153 | */ | ||
154 | static struct perf_counter_context * | ||
155 | perf_lock_task_context(struct task_struct *task, unsigned long *flags) | ||
156 | { | ||
157 | struct perf_counter_context *ctx; | ||
158 | |||
159 | rcu_read_lock(); | ||
160 | retry: | ||
161 | ctx = rcu_dereference(task->perf_counter_ctxp); | ||
162 | if (ctx) { | ||
163 | /* | ||
164 | * If this context is a clone of another, it might | ||
165 | * get swapped for another underneath us by | ||
166 | * perf_counter_task_sched_out, though the | ||
167 | * rcu_read_lock() protects us from any context | ||
168 | * getting freed. Lock the context and check if it | ||
169 | * got swapped before we could get the lock, and retry | ||
170 | * if so. If we locked the right context, then it | ||
171 | * can't get swapped on us any more. | ||
172 | */ | ||
173 | spin_lock_irqsave(&ctx->lock, *flags); | ||
174 | if (ctx != rcu_dereference(task->perf_counter_ctxp)) { | ||
175 | spin_unlock_irqrestore(&ctx->lock, *flags); | ||
176 | goto retry; | ||
177 | } | ||
178 | } | ||
179 | rcu_read_unlock(); | ||
180 | return ctx; | ||
181 | } | ||
182 | |||
183 | /* | ||
184 | * Get the context for a task and increment its pin_count so it | ||
185 | * can't get swapped to another task. This also increments its | ||
186 | * reference count so that the context can't get freed. | ||
187 | */ | ||
188 | static struct perf_counter_context *perf_pin_task_context(struct task_struct *task) | ||
189 | { | ||
190 | struct perf_counter_context *ctx; | ||
191 | unsigned long flags; | ||
192 | |||
193 | ctx = perf_lock_task_context(task, &flags); | ||
194 | if (ctx) { | ||
195 | ++ctx->pin_count; | ||
196 | get_ctx(ctx); | ||
197 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
198 | } | ||
199 | return ctx; | ||
200 | } | ||
201 | |||
202 | static void perf_unpin_context(struct perf_counter_context *ctx) | ||
203 | { | ||
204 | unsigned long flags; | ||
205 | |||
206 | spin_lock_irqsave(&ctx->lock, flags); | ||
207 | --ctx->pin_count; | ||
208 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
209 | put_ctx(ctx); | ||
210 | } | ||
211 | |||
212 | /* | ||
213 | * Add a counter from the lists for its context. | ||
214 | * Must be called with ctx->mutex and ctx->lock held. | ||
215 | */ | ||
216 | static void | ||
217 | list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | ||
218 | { | ||
219 | struct perf_counter *group_leader = counter->group_leader; | ||
220 | |||
221 | /* | ||
222 | * Depending on whether it is a standalone or sibling counter, | ||
223 | * add it straight to the context's counter list, or to the group | ||
224 | * leader's sibling list: | ||
225 | */ | ||
226 | if (group_leader == counter) | ||
227 | list_add_tail(&counter->list_entry, &ctx->counter_list); | ||
228 | else { | ||
229 | list_add_tail(&counter->list_entry, &group_leader->sibling_list); | ||
230 | group_leader->nr_siblings++; | ||
231 | } | ||
232 | |||
233 | list_add_rcu(&counter->event_entry, &ctx->event_list); | ||
234 | ctx->nr_counters++; | ||
235 | } | ||
236 | |||
237 | /* | ||
238 | * Remove a counter from the lists for its context. | ||
239 | * Must be called with ctx->mutex and ctx->lock held. | ||
240 | */ | ||
241 | static void | ||
242 | list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) | ||
243 | { | ||
244 | struct perf_counter *sibling, *tmp; | ||
245 | |||
246 | if (list_empty(&counter->list_entry)) | ||
247 | return; | ||
248 | ctx->nr_counters--; | ||
249 | |||
250 | list_del_init(&counter->list_entry); | ||
251 | list_del_rcu(&counter->event_entry); | ||
252 | |||
253 | if (counter->group_leader != counter) | ||
254 | counter->group_leader->nr_siblings--; | ||
255 | |||
256 | /* | ||
257 | * If this was a group counter with sibling counters then | ||
258 | * upgrade the siblings to singleton counters by adding them | ||
259 | * to the context list directly: | ||
260 | */ | ||
261 | list_for_each_entry_safe(sibling, tmp, | ||
262 | &counter->sibling_list, list_entry) { | ||
263 | |||
264 | list_move_tail(&sibling->list_entry, &ctx->counter_list); | ||
265 | sibling->group_leader = sibling; | ||
266 | } | ||
267 | } | ||
268 | |||
269 | static void | ||
270 | counter_sched_out(struct perf_counter *counter, | ||
271 | struct perf_cpu_context *cpuctx, | ||
272 | struct perf_counter_context *ctx) | ||
273 | { | ||
274 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) | ||
275 | return; | ||
276 | |||
277 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
278 | counter->tstamp_stopped = ctx->time; | ||
279 | counter->pmu->disable(counter); | ||
280 | counter->oncpu = -1; | ||
281 | |||
282 | if (!is_software_counter(counter)) | ||
283 | cpuctx->active_oncpu--; | ||
284 | ctx->nr_active--; | ||
285 | if (counter->attr.exclusive || !cpuctx->active_oncpu) | ||
286 | cpuctx->exclusive = 0; | ||
287 | } | ||
288 | |||
289 | static void | ||
290 | group_sched_out(struct perf_counter *group_counter, | ||
291 | struct perf_cpu_context *cpuctx, | ||
292 | struct perf_counter_context *ctx) | ||
293 | { | ||
294 | struct perf_counter *counter; | ||
295 | |||
296 | if (group_counter->state != PERF_COUNTER_STATE_ACTIVE) | ||
297 | return; | ||
298 | |||
299 | counter_sched_out(group_counter, cpuctx, ctx); | ||
300 | |||
301 | /* | ||
302 | * Schedule out siblings (if any): | ||
303 | */ | ||
304 | list_for_each_entry(counter, &group_counter->sibling_list, list_entry) | ||
305 | counter_sched_out(counter, cpuctx, ctx); | ||
306 | |||
307 | if (group_counter->attr.exclusive) | ||
308 | cpuctx->exclusive = 0; | ||
309 | } | ||
310 | |||
311 | /* | ||
312 | * Cross CPU call to remove a performance counter | ||
313 | * | ||
314 | * We disable the counter on the hardware level first. After that we | ||
315 | * remove it from the context list. | ||
316 | */ | ||
317 | static void __perf_counter_remove_from_context(void *info) | ||
318 | { | ||
319 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
320 | struct perf_counter *counter = info; | ||
321 | struct perf_counter_context *ctx = counter->ctx; | ||
322 | |||
323 | /* | ||
324 | * If this is a task context, we need to check whether it is | ||
325 | * the current task context of this cpu. If not it has been | ||
326 | * scheduled out before the smp call arrived. | ||
327 | */ | ||
328 | if (ctx->task && cpuctx->task_ctx != ctx) | ||
329 | return; | ||
330 | |||
331 | spin_lock(&ctx->lock); | ||
332 | /* | ||
333 | * Protect the list operation against NMI by disabling the | ||
334 | * counters on a global level. | ||
335 | */ | ||
336 | perf_disable(); | ||
337 | |||
338 | counter_sched_out(counter, cpuctx, ctx); | ||
339 | |||
340 | list_del_counter(counter, ctx); | ||
341 | |||
342 | if (!ctx->task) { | ||
343 | /* | ||
344 | * Allow more per task counters with respect to the | ||
345 | * reservation: | ||
346 | */ | ||
347 | cpuctx->max_pertask = | ||
348 | min(perf_max_counters - ctx->nr_counters, | ||
349 | perf_max_counters - perf_reserved_percpu); | ||
350 | } | ||
351 | |||
352 | perf_enable(); | ||
353 | spin_unlock(&ctx->lock); | ||
354 | } | ||
355 | |||
356 | |||
357 | /* | ||
358 | * Remove the counter from a task's (or a CPU's) list of counters. | ||
359 | * | ||
360 | * Must be called with ctx->mutex held. | ||
361 | * | ||
362 | * CPU counters are removed with a smp call. For task counters we only | ||
363 | * call when the task is on a CPU. | ||
364 | * | ||
365 | * If counter->ctx is a cloned context, callers must make sure that | ||
366 | * every task struct that counter->ctx->task could possibly point to | ||
367 | * remains valid. This is OK when called from perf_release since | ||
368 | * that only calls us on the top-level context, which can't be a clone. | ||
369 | * When called from perf_counter_exit_task, it's OK because the | ||
370 | * context has been detached from its task. | ||
371 | */ | ||
372 | static void perf_counter_remove_from_context(struct perf_counter *counter) | ||
373 | { | ||
374 | struct perf_counter_context *ctx = counter->ctx; | ||
375 | struct task_struct *task = ctx->task; | ||
376 | |||
377 | if (!task) { | ||
378 | /* | ||
379 | * Per cpu counters are removed via an smp call and | ||
380 | * the removal is always sucessful. | ||
381 | */ | ||
382 | smp_call_function_single(counter->cpu, | ||
383 | __perf_counter_remove_from_context, | ||
384 | counter, 1); | ||
385 | return; | ||
386 | } | ||
387 | |||
388 | retry: | ||
389 | task_oncpu_function_call(task, __perf_counter_remove_from_context, | ||
390 | counter); | ||
391 | |||
392 | spin_lock_irq(&ctx->lock); | ||
393 | /* | ||
394 | * If the context is active we need to retry the smp call. | ||
395 | */ | ||
396 | if (ctx->nr_active && !list_empty(&counter->list_entry)) { | ||
397 | spin_unlock_irq(&ctx->lock); | ||
398 | goto retry; | ||
399 | } | ||
400 | |||
401 | /* | ||
402 | * The lock prevents that this context is scheduled in so we | ||
403 | * can remove the counter safely, if the call above did not | ||
404 | * succeed. | ||
405 | */ | ||
406 | if (!list_empty(&counter->list_entry)) { | ||
407 | list_del_counter(counter, ctx); | ||
408 | } | ||
409 | spin_unlock_irq(&ctx->lock); | ||
410 | } | ||
411 | |||
412 | static inline u64 perf_clock(void) | ||
413 | { | ||
414 | return cpu_clock(smp_processor_id()); | ||
415 | } | ||
416 | |||
417 | /* | ||
418 | * Update the record of the current time in a context. | ||
419 | */ | ||
420 | static void update_context_time(struct perf_counter_context *ctx) | ||
421 | { | ||
422 | u64 now = perf_clock(); | ||
423 | |||
424 | ctx->time += now - ctx->timestamp; | ||
425 | ctx->timestamp = now; | ||
426 | } | ||
427 | |||
428 | /* | ||
429 | * Update the total_time_enabled and total_time_running fields for a counter. | ||
430 | */ | ||
431 | static void update_counter_times(struct perf_counter *counter) | ||
432 | { | ||
433 | struct perf_counter_context *ctx = counter->ctx; | ||
434 | u64 run_end; | ||
435 | |||
436 | if (counter->state < PERF_COUNTER_STATE_INACTIVE) | ||
437 | return; | ||
438 | |||
439 | counter->total_time_enabled = ctx->time - counter->tstamp_enabled; | ||
440 | |||
441 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) | ||
442 | run_end = counter->tstamp_stopped; | ||
443 | else | ||
444 | run_end = ctx->time; | ||
445 | |||
446 | counter->total_time_running = run_end - counter->tstamp_running; | ||
447 | } | ||
448 | |||
449 | /* | ||
450 | * Update total_time_enabled and total_time_running for all counters in a group. | ||
451 | */ | ||
452 | static void update_group_times(struct perf_counter *leader) | ||
453 | { | ||
454 | struct perf_counter *counter; | ||
455 | |||
456 | update_counter_times(leader); | ||
457 | list_for_each_entry(counter, &leader->sibling_list, list_entry) | ||
458 | update_counter_times(counter); | ||
459 | } | ||
460 | |||
461 | /* | ||
462 | * Cross CPU call to disable a performance counter | ||
463 | */ | ||
464 | static void __perf_counter_disable(void *info) | ||
465 | { | ||
466 | struct perf_counter *counter = info; | ||
467 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
468 | struct perf_counter_context *ctx = counter->ctx; | ||
469 | |||
470 | /* | ||
471 | * If this is a per-task counter, need to check whether this | ||
472 | * counter's task is the current task on this cpu. | ||
473 | */ | ||
474 | if (ctx->task && cpuctx->task_ctx != ctx) | ||
475 | return; | ||
476 | |||
477 | spin_lock(&ctx->lock); | ||
478 | |||
479 | /* | ||
480 | * If the counter is on, turn it off. | ||
481 | * If it is in error state, leave it in error state. | ||
482 | */ | ||
483 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { | ||
484 | update_context_time(ctx); | ||
485 | update_counter_times(counter); | ||
486 | if (counter == counter->group_leader) | ||
487 | group_sched_out(counter, cpuctx, ctx); | ||
488 | else | ||
489 | counter_sched_out(counter, cpuctx, ctx); | ||
490 | counter->state = PERF_COUNTER_STATE_OFF; | ||
491 | } | ||
492 | |||
493 | spin_unlock(&ctx->lock); | ||
494 | } | ||
495 | |||
496 | /* | ||
497 | * Disable a counter. | ||
498 | * | ||
499 | * If counter->ctx is a cloned context, callers must make sure that | ||
500 | * every task struct that counter->ctx->task could possibly point to | ||
501 | * remains valid. This condition is satisifed when called through | ||
502 | * perf_counter_for_each_child or perf_counter_for_each because they | ||
503 | * hold the top-level counter's child_mutex, so any descendant that | ||
504 | * goes to exit will block in sync_child_counter. | ||
505 | * When called from perf_pending_counter it's OK because counter->ctx | ||
506 | * is the current context on this CPU and preemption is disabled, | ||
507 | * hence we can't get into perf_counter_task_sched_out for this context. | ||
508 | */ | ||
509 | static void perf_counter_disable(struct perf_counter *counter) | ||
510 | { | ||
511 | struct perf_counter_context *ctx = counter->ctx; | ||
512 | struct task_struct *task = ctx->task; | ||
513 | |||
514 | if (!task) { | ||
515 | /* | ||
516 | * Disable the counter on the cpu that it's on | ||
517 | */ | ||
518 | smp_call_function_single(counter->cpu, __perf_counter_disable, | ||
519 | counter, 1); | ||
520 | return; | ||
521 | } | ||
522 | |||
523 | retry: | ||
524 | task_oncpu_function_call(task, __perf_counter_disable, counter); | ||
525 | |||
526 | spin_lock_irq(&ctx->lock); | ||
527 | /* | ||
528 | * If the counter is still active, we need to retry the cross-call. | ||
529 | */ | ||
530 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { | ||
531 | spin_unlock_irq(&ctx->lock); | ||
532 | goto retry; | ||
533 | } | ||
534 | |||
535 | /* | ||
536 | * Since we have the lock this context can't be scheduled | ||
537 | * in, so we can change the state safely. | ||
538 | */ | ||
539 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) { | ||
540 | update_counter_times(counter); | ||
541 | counter->state = PERF_COUNTER_STATE_OFF; | ||
542 | } | ||
543 | |||
544 | spin_unlock_irq(&ctx->lock); | ||
545 | } | ||
546 | |||
547 | static int | ||
548 | counter_sched_in(struct perf_counter *counter, | ||
549 | struct perf_cpu_context *cpuctx, | ||
550 | struct perf_counter_context *ctx, | ||
551 | int cpu) | ||
552 | { | ||
553 | if (counter->state <= PERF_COUNTER_STATE_OFF) | ||
554 | return 0; | ||
555 | |||
556 | counter->state = PERF_COUNTER_STATE_ACTIVE; | ||
557 | counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ | ||
558 | /* | ||
559 | * The new state must be visible before we turn it on in the hardware: | ||
560 | */ | ||
561 | smp_wmb(); | ||
562 | |||
563 | if (counter->pmu->enable(counter)) { | ||
564 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
565 | counter->oncpu = -1; | ||
566 | return -EAGAIN; | ||
567 | } | ||
568 | |||
569 | counter->tstamp_running += ctx->time - counter->tstamp_stopped; | ||
570 | |||
571 | if (!is_software_counter(counter)) | ||
572 | cpuctx->active_oncpu++; | ||
573 | ctx->nr_active++; | ||
574 | |||
575 | if (counter->attr.exclusive) | ||
576 | cpuctx->exclusive = 1; | ||
577 | |||
578 | return 0; | ||
579 | } | ||
580 | |||
581 | static int | ||
582 | group_sched_in(struct perf_counter *group_counter, | ||
583 | struct perf_cpu_context *cpuctx, | ||
584 | struct perf_counter_context *ctx, | ||
585 | int cpu) | ||
586 | { | ||
587 | struct perf_counter *counter, *partial_group; | ||
588 | int ret; | ||
589 | |||
590 | if (group_counter->state == PERF_COUNTER_STATE_OFF) | ||
591 | return 0; | ||
592 | |||
593 | ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu); | ||
594 | if (ret) | ||
595 | return ret < 0 ? ret : 0; | ||
596 | |||
597 | if (counter_sched_in(group_counter, cpuctx, ctx, cpu)) | ||
598 | return -EAGAIN; | ||
599 | |||
600 | /* | ||
601 | * Schedule in siblings as one group (if any): | ||
602 | */ | ||
603 | list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { | ||
604 | if (counter_sched_in(counter, cpuctx, ctx, cpu)) { | ||
605 | partial_group = counter; | ||
606 | goto group_error; | ||
607 | } | ||
608 | } | ||
609 | |||
610 | return 0; | ||
611 | |||
612 | group_error: | ||
613 | /* | ||
614 | * Groups can be scheduled in as one unit only, so undo any | ||
615 | * partial group before returning: | ||
616 | */ | ||
617 | list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { | ||
618 | if (counter == partial_group) | ||
619 | break; | ||
620 | counter_sched_out(counter, cpuctx, ctx); | ||
621 | } | ||
622 | counter_sched_out(group_counter, cpuctx, ctx); | ||
623 | |||
624 | return -EAGAIN; | ||
625 | } | ||
626 | |||
627 | /* | ||
628 | * Return 1 for a group consisting entirely of software counters, | ||
629 | * 0 if the group contains any hardware counters. | ||
630 | */ | ||
631 | static int is_software_only_group(struct perf_counter *leader) | ||
632 | { | ||
633 | struct perf_counter *counter; | ||
634 | |||
635 | if (!is_software_counter(leader)) | ||
636 | return 0; | ||
637 | |||
638 | list_for_each_entry(counter, &leader->sibling_list, list_entry) | ||
639 | if (!is_software_counter(counter)) | ||
640 | return 0; | ||
641 | |||
642 | return 1; | ||
643 | } | ||
644 | |||
645 | /* | ||
646 | * Work out whether we can put this counter group on the CPU now. | ||
647 | */ | ||
648 | static int group_can_go_on(struct perf_counter *counter, | ||
649 | struct perf_cpu_context *cpuctx, | ||
650 | int can_add_hw) | ||
651 | { | ||
652 | /* | ||
653 | * Groups consisting entirely of software counters can always go on. | ||
654 | */ | ||
655 | if (is_software_only_group(counter)) | ||
656 | return 1; | ||
657 | /* | ||
658 | * If an exclusive group is already on, no other hardware | ||
659 | * counters can go on. | ||
660 | */ | ||
661 | if (cpuctx->exclusive) | ||
662 | return 0; | ||
663 | /* | ||
664 | * If this group is exclusive and there are already | ||
665 | * counters on the CPU, it can't go on. | ||
666 | */ | ||
667 | if (counter->attr.exclusive && cpuctx->active_oncpu) | ||
668 | return 0; | ||
669 | /* | ||
670 | * Otherwise, try to add it if all previous groups were able | ||
671 | * to go on. | ||
672 | */ | ||
673 | return can_add_hw; | ||
674 | } | ||
675 | |||
676 | static void add_counter_to_ctx(struct perf_counter *counter, | ||
677 | struct perf_counter_context *ctx) | ||
678 | { | ||
679 | list_add_counter(counter, ctx); | ||
680 | counter->tstamp_enabled = ctx->time; | ||
681 | counter->tstamp_running = ctx->time; | ||
682 | counter->tstamp_stopped = ctx->time; | ||
683 | } | ||
684 | |||
685 | /* | ||
686 | * Cross CPU call to install and enable a performance counter | ||
687 | * | ||
688 | * Must be called with ctx->mutex held | ||
689 | */ | ||
690 | static void __perf_install_in_context(void *info) | ||
691 | { | ||
692 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
693 | struct perf_counter *counter = info; | ||
694 | struct perf_counter_context *ctx = counter->ctx; | ||
695 | struct perf_counter *leader = counter->group_leader; | ||
696 | int cpu = smp_processor_id(); | ||
697 | int err; | ||
698 | |||
699 | /* | ||
700 | * If this is a task context, we need to check whether it is | ||
701 | * the current task context of this cpu. If not it has been | ||
702 | * scheduled out before the smp call arrived. | ||
703 | * Or possibly this is the right context but it isn't | ||
704 | * on this cpu because it had no counters. | ||
705 | */ | ||
706 | if (ctx->task && cpuctx->task_ctx != ctx) { | ||
707 | if (cpuctx->task_ctx || ctx->task != current) | ||
708 | return; | ||
709 | cpuctx->task_ctx = ctx; | ||
710 | } | ||
711 | |||
712 | spin_lock(&ctx->lock); | ||
713 | ctx->is_active = 1; | ||
714 | update_context_time(ctx); | ||
715 | |||
716 | /* | ||
717 | * Protect the list operation against NMI by disabling the | ||
718 | * counters on a global level. NOP for non NMI based counters. | ||
719 | */ | ||
720 | perf_disable(); | ||
721 | |||
722 | add_counter_to_ctx(counter, ctx); | ||
723 | |||
724 | /* | ||
725 | * Don't put the counter on if it is disabled or if | ||
726 | * it is in a group and the group isn't on. | ||
727 | */ | ||
728 | if (counter->state != PERF_COUNTER_STATE_INACTIVE || | ||
729 | (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)) | ||
730 | goto unlock; | ||
731 | |||
732 | /* | ||
733 | * An exclusive counter can't go on if there are already active | ||
734 | * hardware counters, and no hardware counter can go on if there | ||
735 | * is already an exclusive counter on. | ||
736 | */ | ||
737 | if (!group_can_go_on(counter, cpuctx, 1)) | ||
738 | err = -EEXIST; | ||
739 | else | ||
740 | err = counter_sched_in(counter, cpuctx, ctx, cpu); | ||
741 | |||
742 | if (err) { | ||
743 | /* | ||
744 | * This counter couldn't go on. If it is in a group | ||
745 | * then we have to pull the whole group off. | ||
746 | * If the counter group is pinned then put it in error state. | ||
747 | */ | ||
748 | if (leader != counter) | ||
749 | group_sched_out(leader, cpuctx, ctx); | ||
750 | if (leader->attr.pinned) { | ||
751 | update_group_times(leader); | ||
752 | leader->state = PERF_COUNTER_STATE_ERROR; | ||
753 | } | ||
754 | } | ||
755 | |||
756 | if (!err && !ctx->task && cpuctx->max_pertask) | ||
757 | cpuctx->max_pertask--; | ||
758 | |||
759 | unlock: | ||
760 | perf_enable(); | ||
761 | |||
762 | spin_unlock(&ctx->lock); | ||
763 | } | ||
764 | |||
765 | /* | ||
766 | * Attach a performance counter to a context | ||
767 | * | ||
768 | * First we add the counter to the list with the hardware enable bit | ||
769 | * in counter->hw_config cleared. | ||
770 | * | ||
771 | * If the counter is attached to a task which is on a CPU we use a smp | ||
772 | * call to enable it in the task context. The task might have been | ||
773 | * scheduled away, but we check this in the smp call again. | ||
774 | * | ||
775 | * Must be called with ctx->mutex held. | ||
776 | */ | ||
777 | static void | ||
778 | perf_install_in_context(struct perf_counter_context *ctx, | ||
779 | struct perf_counter *counter, | ||
780 | int cpu) | ||
781 | { | ||
782 | struct task_struct *task = ctx->task; | ||
783 | |||
784 | if (!task) { | ||
785 | /* | ||
786 | * Per cpu counters are installed via an smp call and | ||
787 | * the install is always sucessful. | ||
788 | */ | ||
789 | smp_call_function_single(cpu, __perf_install_in_context, | ||
790 | counter, 1); | ||
791 | return; | ||
792 | } | ||
793 | |||
794 | retry: | ||
795 | task_oncpu_function_call(task, __perf_install_in_context, | ||
796 | counter); | ||
797 | |||
798 | spin_lock_irq(&ctx->lock); | ||
799 | /* | ||
800 | * we need to retry the smp call. | ||
801 | */ | ||
802 | if (ctx->is_active && list_empty(&counter->list_entry)) { | ||
803 | spin_unlock_irq(&ctx->lock); | ||
804 | goto retry; | ||
805 | } | ||
806 | |||
807 | /* | ||
808 | * The lock prevents that this context is scheduled in so we | ||
809 | * can add the counter safely, if it the call above did not | ||
810 | * succeed. | ||
811 | */ | ||
812 | if (list_empty(&counter->list_entry)) | ||
813 | add_counter_to_ctx(counter, ctx); | ||
814 | spin_unlock_irq(&ctx->lock); | ||
815 | } | ||
816 | |||
817 | /* | ||
818 | * Cross CPU call to enable a performance counter | ||
819 | */ | ||
820 | static void __perf_counter_enable(void *info) | ||
821 | { | ||
822 | struct perf_counter *counter = info; | ||
823 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
824 | struct perf_counter_context *ctx = counter->ctx; | ||
825 | struct perf_counter *leader = counter->group_leader; | ||
826 | int err; | ||
827 | |||
828 | /* | ||
829 | * If this is a per-task counter, need to check whether this | ||
830 | * counter's task is the current task on this cpu. | ||
831 | */ | ||
832 | if (ctx->task && cpuctx->task_ctx != ctx) { | ||
833 | if (cpuctx->task_ctx || ctx->task != current) | ||
834 | return; | ||
835 | cpuctx->task_ctx = ctx; | ||
836 | } | ||
837 | |||
838 | spin_lock(&ctx->lock); | ||
839 | ctx->is_active = 1; | ||
840 | update_context_time(ctx); | ||
841 | |||
842 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) | ||
843 | goto unlock; | ||
844 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
845 | counter->tstamp_enabled = ctx->time - counter->total_time_enabled; | ||
846 | |||
847 | /* | ||
848 | * If the counter is in a group and isn't the group leader, | ||
849 | * then don't put it on unless the group is on. | ||
850 | */ | ||
851 | if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE) | ||
852 | goto unlock; | ||
853 | |||
854 | if (!group_can_go_on(counter, cpuctx, 1)) { | ||
855 | err = -EEXIST; | ||
856 | } else { | ||
857 | perf_disable(); | ||
858 | if (counter == leader) | ||
859 | err = group_sched_in(counter, cpuctx, ctx, | ||
860 | smp_processor_id()); | ||
861 | else | ||
862 | err = counter_sched_in(counter, cpuctx, ctx, | ||
863 | smp_processor_id()); | ||
864 | perf_enable(); | ||
865 | } | ||
866 | |||
867 | if (err) { | ||
868 | /* | ||
869 | * If this counter can't go on and it's part of a | ||
870 | * group, then the whole group has to come off. | ||
871 | */ | ||
872 | if (leader != counter) | ||
873 | group_sched_out(leader, cpuctx, ctx); | ||
874 | if (leader->attr.pinned) { | ||
875 | update_group_times(leader); | ||
876 | leader->state = PERF_COUNTER_STATE_ERROR; | ||
877 | } | ||
878 | } | ||
879 | |||
880 | unlock: | ||
881 | spin_unlock(&ctx->lock); | ||
882 | } | ||
883 | |||
884 | /* | ||
885 | * Enable a counter. | ||
886 | * | ||
887 | * If counter->ctx is a cloned context, callers must make sure that | ||
888 | * every task struct that counter->ctx->task could possibly point to | ||
889 | * remains valid. This condition is satisfied when called through | ||
890 | * perf_counter_for_each_child or perf_counter_for_each as described | ||
891 | * for perf_counter_disable. | ||
892 | */ | ||
893 | static void perf_counter_enable(struct perf_counter *counter) | ||
894 | { | ||
895 | struct perf_counter_context *ctx = counter->ctx; | ||
896 | struct task_struct *task = ctx->task; | ||
897 | |||
898 | if (!task) { | ||
899 | /* | ||
900 | * Enable the counter on the cpu that it's on | ||
901 | */ | ||
902 | smp_call_function_single(counter->cpu, __perf_counter_enable, | ||
903 | counter, 1); | ||
904 | return; | ||
905 | } | ||
906 | |||
907 | spin_lock_irq(&ctx->lock); | ||
908 | if (counter->state >= PERF_COUNTER_STATE_INACTIVE) | ||
909 | goto out; | ||
910 | |||
911 | /* | ||
912 | * If the counter is in error state, clear that first. | ||
913 | * That way, if we see the counter in error state below, we | ||
914 | * know that it has gone back into error state, as distinct | ||
915 | * from the task having been scheduled away before the | ||
916 | * cross-call arrived. | ||
917 | */ | ||
918 | if (counter->state == PERF_COUNTER_STATE_ERROR) | ||
919 | counter->state = PERF_COUNTER_STATE_OFF; | ||
920 | |||
921 | retry: | ||
922 | spin_unlock_irq(&ctx->lock); | ||
923 | task_oncpu_function_call(task, __perf_counter_enable, counter); | ||
924 | |||
925 | spin_lock_irq(&ctx->lock); | ||
926 | |||
927 | /* | ||
928 | * If the context is active and the counter is still off, | ||
929 | * we need to retry the cross-call. | ||
930 | */ | ||
931 | if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF) | ||
932 | goto retry; | ||
933 | |||
934 | /* | ||
935 | * Since we have the lock this context can't be scheduled | ||
936 | * in, so we can change the state safely. | ||
937 | */ | ||
938 | if (counter->state == PERF_COUNTER_STATE_OFF) { | ||
939 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
940 | counter->tstamp_enabled = | ||
941 | ctx->time - counter->total_time_enabled; | ||
942 | } | ||
943 | out: | ||
944 | spin_unlock_irq(&ctx->lock); | ||
945 | } | ||
946 | |||
947 | static int perf_counter_refresh(struct perf_counter *counter, int refresh) | ||
948 | { | ||
949 | /* | ||
950 | * not supported on inherited counters | ||
951 | */ | ||
952 | if (counter->attr.inherit) | ||
953 | return -EINVAL; | ||
954 | |||
955 | atomic_add(refresh, &counter->event_limit); | ||
956 | perf_counter_enable(counter); | ||
957 | |||
958 | return 0; | ||
959 | } | ||
960 | |||
961 | void __perf_counter_sched_out(struct perf_counter_context *ctx, | ||
962 | struct perf_cpu_context *cpuctx) | ||
963 | { | ||
964 | struct perf_counter *counter; | ||
965 | |||
966 | spin_lock(&ctx->lock); | ||
967 | ctx->is_active = 0; | ||
968 | if (likely(!ctx->nr_counters)) | ||
969 | goto out; | ||
970 | update_context_time(ctx); | ||
971 | |||
972 | perf_disable(); | ||
973 | if (ctx->nr_active) { | ||
974 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
975 | if (counter != counter->group_leader) | ||
976 | counter_sched_out(counter, cpuctx, ctx); | ||
977 | else | ||
978 | group_sched_out(counter, cpuctx, ctx); | ||
979 | } | ||
980 | } | ||
981 | perf_enable(); | ||
982 | out: | ||
983 | spin_unlock(&ctx->lock); | ||
984 | } | ||
985 | |||
986 | /* | ||
987 | * Test whether two contexts are equivalent, i.e. whether they | ||
988 | * have both been cloned from the same version of the same context | ||
989 | * and they both have the same number of enabled counters. | ||
990 | * If the number of enabled counters is the same, then the set | ||
991 | * of enabled counters should be the same, because these are both | ||
992 | * inherited contexts, therefore we can't access individual counters | ||
993 | * in them directly with an fd; we can only enable/disable all | ||
994 | * counters via prctl, or enable/disable all counters in a family | ||
995 | * via ioctl, which will have the same effect on both contexts. | ||
996 | */ | ||
997 | static int context_equiv(struct perf_counter_context *ctx1, | ||
998 | struct perf_counter_context *ctx2) | ||
999 | { | ||
1000 | return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx | ||
1001 | && ctx1->parent_gen == ctx2->parent_gen | ||
1002 | && !ctx1->pin_count && !ctx2->pin_count; | ||
1003 | } | ||
1004 | |||
1005 | /* | ||
1006 | * Called from scheduler to remove the counters of the current task, | ||
1007 | * with interrupts disabled. | ||
1008 | * | ||
1009 | * We stop each counter and update the counter value in counter->count. | ||
1010 | * | ||
1011 | * This does not protect us against NMI, but disable() | ||
1012 | * sets the disabled bit in the control field of counter _before_ | ||
1013 | * accessing the counter control register. If a NMI hits, then it will | ||
1014 | * not restart the counter. | ||
1015 | */ | ||
1016 | void perf_counter_task_sched_out(struct task_struct *task, | ||
1017 | struct task_struct *next, int cpu) | ||
1018 | { | ||
1019 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
1020 | struct perf_counter_context *ctx = task->perf_counter_ctxp; | ||
1021 | struct perf_counter_context *next_ctx; | ||
1022 | struct perf_counter_context *parent; | ||
1023 | struct pt_regs *regs; | ||
1024 | int do_switch = 1; | ||
1025 | |||
1026 | regs = task_pt_regs(task); | ||
1027 | perf_swcounter_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0); | ||
1028 | |||
1029 | if (likely(!ctx || !cpuctx->task_ctx)) | ||
1030 | return; | ||
1031 | |||
1032 | update_context_time(ctx); | ||
1033 | |||
1034 | rcu_read_lock(); | ||
1035 | parent = rcu_dereference(ctx->parent_ctx); | ||
1036 | next_ctx = next->perf_counter_ctxp; | ||
1037 | if (parent && next_ctx && | ||
1038 | rcu_dereference(next_ctx->parent_ctx) == parent) { | ||
1039 | /* | ||
1040 | * Looks like the two contexts are clones, so we might be | ||
1041 | * able to optimize the context switch. We lock both | ||
1042 | * contexts and check that they are clones under the | ||
1043 | * lock (including re-checking that neither has been | ||
1044 | * uncloned in the meantime). It doesn't matter which | ||
1045 | * order we take the locks because no other cpu could | ||
1046 | * be trying to lock both of these tasks. | ||
1047 | */ | ||
1048 | spin_lock(&ctx->lock); | ||
1049 | spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); | ||
1050 | if (context_equiv(ctx, next_ctx)) { | ||
1051 | /* | ||
1052 | * XXX do we need a memory barrier of sorts | ||
1053 | * wrt to rcu_dereference() of perf_counter_ctxp | ||
1054 | */ | ||
1055 | task->perf_counter_ctxp = next_ctx; | ||
1056 | next->perf_counter_ctxp = ctx; | ||
1057 | ctx->task = next; | ||
1058 | next_ctx->task = task; | ||
1059 | do_switch = 0; | ||
1060 | } | ||
1061 | spin_unlock(&next_ctx->lock); | ||
1062 | spin_unlock(&ctx->lock); | ||
1063 | } | ||
1064 | rcu_read_unlock(); | ||
1065 | |||
1066 | if (do_switch) { | ||
1067 | __perf_counter_sched_out(ctx, cpuctx); | ||
1068 | cpuctx->task_ctx = NULL; | ||
1069 | } | ||
1070 | } | ||
1071 | |||
1072 | /* | ||
1073 | * Called with IRQs disabled | ||
1074 | */ | ||
1075 | static void __perf_counter_task_sched_out(struct perf_counter_context *ctx) | ||
1076 | { | ||
1077 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
1078 | |||
1079 | if (!cpuctx->task_ctx) | ||
1080 | return; | ||
1081 | |||
1082 | if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) | ||
1083 | return; | ||
1084 | |||
1085 | __perf_counter_sched_out(ctx, cpuctx); | ||
1086 | cpuctx->task_ctx = NULL; | ||
1087 | } | ||
1088 | |||
1089 | /* | ||
1090 | * Called with IRQs disabled | ||
1091 | */ | ||
1092 | static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx) | ||
1093 | { | ||
1094 | __perf_counter_sched_out(&cpuctx->ctx, cpuctx); | ||
1095 | } | ||
1096 | |||
1097 | static void | ||
1098 | __perf_counter_sched_in(struct perf_counter_context *ctx, | ||
1099 | struct perf_cpu_context *cpuctx, int cpu) | ||
1100 | { | ||
1101 | struct perf_counter *counter; | ||
1102 | int can_add_hw = 1; | ||
1103 | |||
1104 | spin_lock(&ctx->lock); | ||
1105 | ctx->is_active = 1; | ||
1106 | if (likely(!ctx->nr_counters)) | ||
1107 | goto out; | ||
1108 | |||
1109 | ctx->timestamp = perf_clock(); | ||
1110 | |||
1111 | perf_disable(); | ||
1112 | |||
1113 | /* | ||
1114 | * First go through the list and put on any pinned groups | ||
1115 | * in order to give them the best chance of going on. | ||
1116 | */ | ||
1117 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
1118 | if (counter->state <= PERF_COUNTER_STATE_OFF || | ||
1119 | !counter->attr.pinned) | ||
1120 | continue; | ||
1121 | if (counter->cpu != -1 && counter->cpu != cpu) | ||
1122 | continue; | ||
1123 | |||
1124 | if (counter != counter->group_leader) | ||
1125 | counter_sched_in(counter, cpuctx, ctx, cpu); | ||
1126 | else { | ||
1127 | if (group_can_go_on(counter, cpuctx, 1)) | ||
1128 | group_sched_in(counter, cpuctx, ctx, cpu); | ||
1129 | } | ||
1130 | |||
1131 | /* | ||
1132 | * If this pinned group hasn't been scheduled, | ||
1133 | * put it in error state. | ||
1134 | */ | ||
1135 | if (counter->state == PERF_COUNTER_STATE_INACTIVE) { | ||
1136 | update_group_times(counter); | ||
1137 | counter->state = PERF_COUNTER_STATE_ERROR; | ||
1138 | } | ||
1139 | } | ||
1140 | |||
1141 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
1142 | /* | ||
1143 | * Ignore counters in OFF or ERROR state, and | ||
1144 | * ignore pinned counters since we did them already. | ||
1145 | */ | ||
1146 | if (counter->state <= PERF_COUNTER_STATE_OFF || | ||
1147 | counter->attr.pinned) | ||
1148 | continue; | ||
1149 | |||
1150 | /* | ||
1151 | * Listen to the 'cpu' scheduling filter constraint | ||
1152 | * of counters: | ||
1153 | */ | ||
1154 | if (counter->cpu != -1 && counter->cpu != cpu) | ||
1155 | continue; | ||
1156 | |||
1157 | if (counter != counter->group_leader) { | ||
1158 | if (counter_sched_in(counter, cpuctx, ctx, cpu)) | ||
1159 | can_add_hw = 0; | ||
1160 | } else { | ||
1161 | if (group_can_go_on(counter, cpuctx, can_add_hw)) { | ||
1162 | if (group_sched_in(counter, cpuctx, ctx, cpu)) | ||
1163 | can_add_hw = 0; | ||
1164 | } | ||
1165 | } | ||
1166 | } | ||
1167 | perf_enable(); | ||
1168 | out: | ||
1169 | spin_unlock(&ctx->lock); | ||
1170 | } | ||
1171 | |||
1172 | /* | ||
1173 | * Called from scheduler to add the counters of the current task | ||
1174 | * with interrupts disabled. | ||
1175 | * | ||
1176 | * We restore the counter value and then enable it. | ||
1177 | * | ||
1178 | * This does not protect us against NMI, but enable() | ||
1179 | * sets the enabled bit in the control field of counter _before_ | ||
1180 | * accessing the counter control register. If a NMI hits, then it will | ||
1181 | * keep the counter running. | ||
1182 | */ | ||
1183 | void perf_counter_task_sched_in(struct task_struct *task, int cpu) | ||
1184 | { | ||
1185 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
1186 | struct perf_counter_context *ctx = task->perf_counter_ctxp; | ||
1187 | |||
1188 | if (likely(!ctx)) | ||
1189 | return; | ||
1190 | if (cpuctx->task_ctx == ctx) | ||
1191 | return; | ||
1192 | __perf_counter_sched_in(ctx, cpuctx, cpu); | ||
1193 | cpuctx->task_ctx = ctx; | ||
1194 | } | ||
1195 | |||
1196 | static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) | ||
1197 | { | ||
1198 | struct perf_counter_context *ctx = &cpuctx->ctx; | ||
1199 | |||
1200 | __perf_counter_sched_in(ctx, cpuctx, cpu); | ||
1201 | } | ||
1202 | |||
1203 | #define MAX_INTERRUPTS (~0ULL) | ||
1204 | |||
1205 | static void perf_log_throttle(struct perf_counter *counter, int enable); | ||
1206 | static void perf_log_period(struct perf_counter *counter, u64 period); | ||
1207 | |||
1208 | static void perf_adjust_period(struct perf_counter *counter, u64 events) | ||
1209 | { | ||
1210 | struct hw_perf_counter *hwc = &counter->hw; | ||
1211 | u64 period, sample_period; | ||
1212 | s64 delta; | ||
1213 | |||
1214 | events *= hwc->sample_period; | ||
1215 | period = div64_u64(events, counter->attr.sample_freq); | ||
1216 | |||
1217 | delta = (s64)(period - hwc->sample_period); | ||
1218 | delta = (delta + 7) / 8; /* low pass filter */ | ||
1219 | |||
1220 | sample_period = hwc->sample_period + delta; | ||
1221 | |||
1222 | if (!sample_period) | ||
1223 | sample_period = 1; | ||
1224 | |||
1225 | perf_log_period(counter, sample_period); | ||
1226 | |||
1227 | hwc->sample_period = sample_period; | ||
1228 | } | ||
1229 | |||
1230 | static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) | ||
1231 | { | ||
1232 | struct perf_counter *counter; | ||
1233 | struct hw_perf_counter *hwc; | ||
1234 | u64 interrupts, freq; | ||
1235 | |||
1236 | spin_lock(&ctx->lock); | ||
1237 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
1238 | if (counter->state != PERF_COUNTER_STATE_ACTIVE) | ||
1239 | continue; | ||
1240 | |||
1241 | hwc = &counter->hw; | ||
1242 | |||
1243 | interrupts = hwc->interrupts; | ||
1244 | hwc->interrupts = 0; | ||
1245 | |||
1246 | /* | ||
1247 | * unthrottle counters on the tick | ||
1248 | */ | ||
1249 | if (interrupts == MAX_INTERRUPTS) { | ||
1250 | perf_log_throttle(counter, 1); | ||
1251 | counter->pmu->unthrottle(counter); | ||
1252 | interrupts = 2*sysctl_perf_counter_sample_rate/HZ; | ||
1253 | } | ||
1254 | |||
1255 | if (!counter->attr.freq || !counter->attr.sample_freq) | ||
1256 | continue; | ||
1257 | |||
1258 | /* | ||
1259 | * if the specified freq < HZ then we need to skip ticks | ||
1260 | */ | ||
1261 | if (counter->attr.sample_freq < HZ) { | ||
1262 | freq = counter->attr.sample_freq; | ||
1263 | |||
1264 | hwc->freq_count += freq; | ||
1265 | hwc->freq_interrupts += interrupts; | ||
1266 | |||
1267 | if (hwc->freq_count < HZ) | ||
1268 | continue; | ||
1269 | |||
1270 | interrupts = hwc->freq_interrupts; | ||
1271 | hwc->freq_interrupts = 0; | ||
1272 | hwc->freq_count -= HZ; | ||
1273 | } else | ||
1274 | freq = HZ; | ||
1275 | |||
1276 | perf_adjust_period(counter, freq * interrupts); | ||
1277 | |||
1278 | /* | ||
1279 | * In order to avoid being stalled by an (accidental) huge | ||
1280 | * sample period, force reset the sample period if we didn't | ||
1281 | * get any events in this freq period. | ||
1282 | */ | ||
1283 | if (!interrupts) { | ||
1284 | perf_disable(); | ||
1285 | counter->pmu->disable(counter); | ||
1286 | atomic_set(&hwc->period_left, 0); | ||
1287 | counter->pmu->enable(counter); | ||
1288 | perf_enable(); | ||
1289 | } | ||
1290 | } | ||
1291 | spin_unlock(&ctx->lock); | ||
1292 | } | ||
1293 | |||
1294 | /* | ||
1295 | * Round-robin a context's counters: | ||
1296 | */ | ||
1297 | static void rotate_ctx(struct perf_counter_context *ctx) | ||
1298 | { | ||
1299 | struct perf_counter *counter; | ||
1300 | |||
1301 | if (!ctx->nr_counters) | ||
1302 | return; | ||
1303 | |||
1304 | spin_lock(&ctx->lock); | ||
1305 | /* | ||
1306 | * Rotate the first entry last (works just fine for group counters too): | ||
1307 | */ | ||
1308 | perf_disable(); | ||
1309 | list_for_each_entry(counter, &ctx->counter_list, list_entry) { | ||
1310 | list_move_tail(&counter->list_entry, &ctx->counter_list); | ||
1311 | break; | ||
1312 | } | ||
1313 | perf_enable(); | ||
1314 | |||
1315 | spin_unlock(&ctx->lock); | ||
1316 | } | ||
1317 | |||
1318 | void perf_counter_task_tick(struct task_struct *curr, int cpu) | ||
1319 | { | ||
1320 | struct perf_cpu_context *cpuctx; | ||
1321 | struct perf_counter_context *ctx; | ||
1322 | |||
1323 | if (!atomic_read(&nr_counters)) | ||
1324 | return; | ||
1325 | |||
1326 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
1327 | ctx = curr->perf_counter_ctxp; | ||
1328 | |||
1329 | perf_ctx_adjust_freq(&cpuctx->ctx); | ||
1330 | if (ctx) | ||
1331 | perf_ctx_adjust_freq(ctx); | ||
1332 | |||
1333 | perf_counter_cpu_sched_out(cpuctx); | ||
1334 | if (ctx) | ||
1335 | __perf_counter_task_sched_out(ctx); | ||
1336 | |||
1337 | rotate_ctx(&cpuctx->ctx); | ||
1338 | if (ctx) | ||
1339 | rotate_ctx(ctx); | ||
1340 | |||
1341 | perf_counter_cpu_sched_in(cpuctx, cpu); | ||
1342 | if (ctx) | ||
1343 | perf_counter_task_sched_in(curr, cpu); | ||
1344 | } | ||
1345 | |||
1346 | /* | ||
1347 | * Cross CPU call to read the hardware counter | ||
1348 | */ | ||
1349 | static void __read(void *info) | ||
1350 | { | ||
1351 | struct perf_counter *counter = info; | ||
1352 | struct perf_counter_context *ctx = counter->ctx; | ||
1353 | unsigned long flags; | ||
1354 | |||
1355 | local_irq_save(flags); | ||
1356 | if (ctx->is_active) | ||
1357 | update_context_time(ctx); | ||
1358 | counter->pmu->read(counter); | ||
1359 | update_counter_times(counter); | ||
1360 | local_irq_restore(flags); | ||
1361 | } | ||
1362 | |||
1363 | static u64 perf_counter_read(struct perf_counter *counter) | ||
1364 | { | ||
1365 | /* | ||
1366 | * If counter is enabled and currently active on a CPU, update the | ||
1367 | * value in the counter structure: | ||
1368 | */ | ||
1369 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) { | ||
1370 | smp_call_function_single(counter->oncpu, | ||
1371 | __read, counter, 1); | ||
1372 | } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { | ||
1373 | update_counter_times(counter); | ||
1374 | } | ||
1375 | |||
1376 | return atomic64_read(&counter->count); | ||
1377 | } | ||
1378 | |||
1379 | /* | ||
1380 | * Initialize the perf_counter context in a task_struct: | ||
1381 | */ | ||
1382 | static void | ||
1383 | __perf_counter_init_context(struct perf_counter_context *ctx, | ||
1384 | struct task_struct *task) | ||
1385 | { | ||
1386 | memset(ctx, 0, sizeof(*ctx)); | ||
1387 | spin_lock_init(&ctx->lock); | ||
1388 | mutex_init(&ctx->mutex); | ||
1389 | INIT_LIST_HEAD(&ctx->counter_list); | ||
1390 | INIT_LIST_HEAD(&ctx->event_list); | ||
1391 | atomic_set(&ctx->refcount, 1); | ||
1392 | ctx->task = task; | ||
1393 | } | ||
1394 | |||
1395 | static struct perf_counter_context *find_get_context(pid_t pid, int cpu) | ||
1396 | { | ||
1397 | struct perf_counter_context *parent_ctx; | ||
1398 | struct perf_counter_context *ctx; | ||
1399 | struct perf_cpu_context *cpuctx; | ||
1400 | struct task_struct *task; | ||
1401 | unsigned long flags; | ||
1402 | int err; | ||
1403 | |||
1404 | /* | ||
1405 | * If cpu is not a wildcard then this is a percpu counter: | ||
1406 | */ | ||
1407 | if (cpu != -1) { | ||
1408 | /* Must be root to operate on a CPU counter: */ | ||
1409 | if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
1410 | return ERR_PTR(-EACCES); | ||
1411 | |||
1412 | if (cpu < 0 || cpu > num_possible_cpus()) | ||
1413 | return ERR_PTR(-EINVAL); | ||
1414 | |||
1415 | /* | ||
1416 | * We could be clever and allow to attach a counter to an | ||
1417 | * offline CPU and activate it when the CPU comes up, but | ||
1418 | * that's for later. | ||
1419 | */ | ||
1420 | if (!cpu_isset(cpu, cpu_online_map)) | ||
1421 | return ERR_PTR(-ENODEV); | ||
1422 | |||
1423 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
1424 | ctx = &cpuctx->ctx; | ||
1425 | get_ctx(ctx); | ||
1426 | |||
1427 | return ctx; | ||
1428 | } | ||
1429 | |||
1430 | rcu_read_lock(); | ||
1431 | if (!pid) | ||
1432 | task = current; | ||
1433 | else | ||
1434 | task = find_task_by_vpid(pid); | ||
1435 | if (task) | ||
1436 | get_task_struct(task); | ||
1437 | rcu_read_unlock(); | ||
1438 | |||
1439 | if (!task) | ||
1440 | return ERR_PTR(-ESRCH); | ||
1441 | |||
1442 | /* | ||
1443 | * Can't attach counters to a dying task. | ||
1444 | */ | ||
1445 | err = -ESRCH; | ||
1446 | if (task->flags & PF_EXITING) | ||
1447 | goto errout; | ||
1448 | |||
1449 | /* Reuse ptrace permission checks for now. */ | ||
1450 | err = -EACCES; | ||
1451 | if (!ptrace_may_access(task, PTRACE_MODE_READ)) | ||
1452 | goto errout; | ||
1453 | |||
1454 | retry: | ||
1455 | ctx = perf_lock_task_context(task, &flags); | ||
1456 | if (ctx) { | ||
1457 | parent_ctx = ctx->parent_ctx; | ||
1458 | if (parent_ctx) { | ||
1459 | put_ctx(parent_ctx); | ||
1460 | ctx->parent_ctx = NULL; /* no longer a clone */ | ||
1461 | } | ||
1462 | /* | ||
1463 | * Get an extra reference before dropping the lock so that | ||
1464 | * this context won't get freed if the task exits. | ||
1465 | */ | ||
1466 | get_ctx(ctx); | ||
1467 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
1468 | } | ||
1469 | |||
1470 | if (!ctx) { | ||
1471 | ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); | ||
1472 | err = -ENOMEM; | ||
1473 | if (!ctx) | ||
1474 | goto errout; | ||
1475 | __perf_counter_init_context(ctx, task); | ||
1476 | get_ctx(ctx); | ||
1477 | if (cmpxchg(&task->perf_counter_ctxp, NULL, ctx)) { | ||
1478 | /* | ||
1479 | * We raced with some other task; use | ||
1480 | * the context they set. | ||
1481 | */ | ||
1482 | kfree(ctx); | ||
1483 | goto retry; | ||
1484 | } | ||
1485 | get_task_struct(task); | ||
1486 | } | ||
1487 | |||
1488 | put_task_struct(task); | ||
1489 | return ctx; | ||
1490 | |||
1491 | errout: | ||
1492 | put_task_struct(task); | ||
1493 | return ERR_PTR(err); | ||
1494 | } | ||
1495 | |||
1496 | static void free_counter_rcu(struct rcu_head *head) | ||
1497 | { | ||
1498 | struct perf_counter *counter; | ||
1499 | |||
1500 | counter = container_of(head, struct perf_counter, rcu_head); | ||
1501 | if (counter->ns) | ||
1502 | put_pid_ns(counter->ns); | ||
1503 | kfree(counter); | ||
1504 | } | ||
1505 | |||
1506 | static void perf_pending_sync(struct perf_counter *counter); | ||
1507 | |||
1508 | static void free_counter(struct perf_counter *counter) | ||
1509 | { | ||
1510 | perf_pending_sync(counter); | ||
1511 | |||
1512 | atomic_dec(&nr_counters); | ||
1513 | if (counter->attr.mmap) | ||
1514 | atomic_dec(&nr_mmap_counters); | ||
1515 | if (counter->attr.comm) | ||
1516 | atomic_dec(&nr_comm_counters); | ||
1517 | |||
1518 | if (counter->destroy) | ||
1519 | counter->destroy(counter); | ||
1520 | |||
1521 | put_ctx(counter->ctx); | ||
1522 | call_rcu(&counter->rcu_head, free_counter_rcu); | ||
1523 | } | ||
1524 | |||
1525 | /* | ||
1526 | * Called when the last reference to the file is gone. | ||
1527 | */ | ||
1528 | static int perf_release(struct inode *inode, struct file *file) | ||
1529 | { | ||
1530 | struct perf_counter *counter = file->private_data; | ||
1531 | struct perf_counter_context *ctx = counter->ctx; | ||
1532 | |||
1533 | file->private_data = NULL; | ||
1534 | |||
1535 | WARN_ON_ONCE(ctx->parent_ctx); | ||
1536 | mutex_lock(&ctx->mutex); | ||
1537 | perf_counter_remove_from_context(counter); | ||
1538 | mutex_unlock(&ctx->mutex); | ||
1539 | |||
1540 | mutex_lock(&counter->owner->perf_counter_mutex); | ||
1541 | list_del_init(&counter->owner_entry); | ||
1542 | mutex_unlock(&counter->owner->perf_counter_mutex); | ||
1543 | put_task_struct(counter->owner); | ||
1544 | |||
1545 | free_counter(counter); | ||
1546 | |||
1547 | return 0; | ||
1548 | } | ||
1549 | |||
1550 | /* | ||
1551 | * Read the performance counter - simple non blocking version for now | ||
1552 | */ | ||
1553 | static ssize_t | ||
1554 | perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) | ||
1555 | { | ||
1556 | u64 values[3]; | ||
1557 | int n; | ||
1558 | |||
1559 | /* | ||
1560 | * Return end-of-file for a read on a counter that is in | ||
1561 | * error state (i.e. because it was pinned but it couldn't be | ||
1562 | * scheduled on to the CPU at some point). | ||
1563 | */ | ||
1564 | if (counter->state == PERF_COUNTER_STATE_ERROR) | ||
1565 | return 0; | ||
1566 | |||
1567 | WARN_ON_ONCE(counter->ctx->parent_ctx); | ||
1568 | mutex_lock(&counter->child_mutex); | ||
1569 | values[0] = perf_counter_read(counter); | ||
1570 | n = 1; | ||
1571 | if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | ||
1572 | values[n++] = counter->total_time_enabled + | ||
1573 | atomic64_read(&counter->child_total_time_enabled); | ||
1574 | if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
1575 | values[n++] = counter->total_time_running + | ||
1576 | atomic64_read(&counter->child_total_time_running); | ||
1577 | if (counter->attr.read_format & PERF_FORMAT_ID) | ||
1578 | values[n++] = counter->id; | ||
1579 | mutex_unlock(&counter->child_mutex); | ||
1580 | |||
1581 | if (count < n * sizeof(u64)) | ||
1582 | return -EINVAL; | ||
1583 | count = n * sizeof(u64); | ||
1584 | |||
1585 | if (copy_to_user(buf, values, count)) | ||
1586 | return -EFAULT; | ||
1587 | |||
1588 | return count; | ||
1589 | } | ||
1590 | |||
1591 | static ssize_t | ||
1592 | perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) | ||
1593 | { | ||
1594 | struct perf_counter *counter = file->private_data; | ||
1595 | |||
1596 | return perf_read_hw(counter, buf, count); | ||
1597 | } | ||
1598 | |||
1599 | static unsigned int perf_poll(struct file *file, poll_table *wait) | ||
1600 | { | ||
1601 | struct perf_counter *counter = file->private_data; | ||
1602 | struct perf_mmap_data *data; | ||
1603 | unsigned int events = POLL_HUP; | ||
1604 | |||
1605 | rcu_read_lock(); | ||
1606 | data = rcu_dereference(counter->data); | ||
1607 | if (data) | ||
1608 | events = atomic_xchg(&data->poll, 0); | ||
1609 | rcu_read_unlock(); | ||
1610 | |||
1611 | poll_wait(file, &counter->waitq, wait); | ||
1612 | |||
1613 | return events; | ||
1614 | } | ||
1615 | |||
1616 | static void perf_counter_reset(struct perf_counter *counter) | ||
1617 | { | ||
1618 | (void)perf_counter_read(counter); | ||
1619 | atomic64_set(&counter->count, 0); | ||
1620 | perf_counter_update_userpage(counter); | ||
1621 | } | ||
1622 | |||
1623 | static void perf_counter_for_each_sibling(struct perf_counter *counter, | ||
1624 | void (*func)(struct perf_counter *)) | ||
1625 | { | ||
1626 | struct perf_counter_context *ctx = counter->ctx; | ||
1627 | struct perf_counter *sibling; | ||
1628 | |||
1629 | WARN_ON_ONCE(ctx->parent_ctx); | ||
1630 | mutex_lock(&ctx->mutex); | ||
1631 | counter = counter->group_leader; | ||
1632 | |||
1633 | func(counter); | ||
1634 | list_for_each_entry(sibling, &counter->sibling_list, list_entry) | ||
1635 | func(sibling); | ||
1636 | mutex_unlock(&ctx->mutex); | ||
1637 | } | ||
1638 | |||
1639 | /* | ||
1640 | * Holding the top-level counter's child_mutex means that any | ||
1641 | * descendant process that has inherited this counter will block | ||
1642 | * in sync_child_counter if it goes to exit, thus satisfying the | ||
1643 | * task existence requirements of perf_counter_enable/disable. | ||
1644 | */ | ||
1645 | static void perf_counter_for_each_child(struct perf_counter *counter, | ||
1646 | void (*func)(struct perf_counter *)) | ||
1647 | { | ||
1648 | struct perf_counter *child; | ||
1649 | |||
1650 | WARN_ON_ONCE(counter->ctx->parent_ctx); | ||
1651 | mutex_lock(&counter->child_mutex); | ||
1652 | func(counter); | ||
1653 | list_for_each_entry(child, &counter->child_list, child_list) | ||
1654 | func(child); | ||
1655 | mutex_unlock(&counter->child_mutex); | ||
1656 | } | ||
1657 | |||
1658 | static void perf_counter_for_each(struct perf_counter *counter, | ||
1659 | void (*func)(struct perf_counter *)) | ||
1660 | { | ||
1661 | struct perf_counter *child; | ||
1662 | |||
1663 | WARN_ON_ONCE(counter->ctx->parent_ctx); | ||
1664 | mutex_lock(&counter->child_mutex); | ||
1665 | perf_counter_for_each_sibling(counter, func); | ||
1666 | list_for_each_entry(child, &counter->child_list, child_list) | ||
1667 | perf_counter_for_each_sibling(child, func); | ||
1668 | mutex_unlock(&counter->child_mutex); | ||
1669 | } | ||
1670 | |||
1671 | static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) | ||
1672 | { | ||
1673 | struct perf_counter_context *ctx = counter->ctx; | ||
1674 | unsigned long size; | ||
1675 | int ret = 0; | ||
1676 | u64 value; | ||
1677 | |||
1678 | if (!counter->attr.sample_period) | ||
1679 | return -EINVAL; | ||
1680 | |||
1681 | size = copy_from_user(&value, arg, sizeof(value)); | ||
1682 | if (size != sizeof(value)) | ||
1683 | return -EFAULT; | ||
1684 | |||
1685 | if (!value) | ||
1686 | return -EINVAL; | ||
1687 | |||
1688 | spin_lock_irq(&ctx->lock); | ||
1689 | if (counter->attr.freq) { | ||
1690 | if (value > sysctl_perf_counter_sample_rate) { | ||
1691 | ret = -EINVAL; | ||
1692 | goto unlock; | ||
1693 | } | ||
1694 | |||
1695 | counter->attr.sample_freq = value; | ||
1696 | } else { | ||
1697 | perf_log_period(counter, value); | ||
1698 | |||
1699 | counter->attr.sample_period = value; | ||
1700 | counter->hw.sample_period = value; | ||
1701 | } | ||
1702 | unlock: | ||
1703 | spin_unlock_irq(&ctx->lock); | ||
1704 | |||
1705 | return ret; | ||
1706 | } | ||
1707 | |||
1708 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
1709 | { | ||
1710 | struct perf_counter *counter = file->private_data; | ||
1711 | void (*func)(struct perf_counter *); | ||
1712 | u32 flags = arg; | ||
1713 | |||
1714 | switch (cmd) { | ||
1715 | case PERF_COUNTER_IOC_ENABLE: | ||
1716 | func = perf_counter_enable; | ||
1717 | break; | ||
1718 | case PERF_COUNTER_IOC_DISABLE: | ||
1719 | func = perf_counter_disable; | ||
1720 | break; | ||
1721 | case PERF_COUNTER_IOC_RESET: | ||
1722 | func = perf_counter_reset; | ||
1723 | break; | ||
1724 | |||
1725 | case PERF_COUNTER_IOC_REFRESH: | ||
1726 | return perf_counter_refresh(counter, arg); | ||
1727 | |||
1728 | case PERF_COUNTER_IOC_PERIOD: | ||
1729 | return perf_counter_period(counter, (u64 __user *)arg); | ||
1730 | |||
1731 | default: | ||
1732 | return -ENOTTY; | ||
1733 | } | ||
1734 | |||
1735 | if (flags & PERF_IOC_FLAG_GROUP) | ||
1736 | perf_counter_for_each(counter, func); | ||
1737 | else | ||
1738 | perf_counter_for_each_child(counter, func); | ||
1739 | |||
1740 | return 0; | ||
1741 | } | ||
1742 | |||
1743 | int perf_counter_task_enable(void) | ||
1744 | { | ||
1745 | struct perf_counter *counter; | ||
1746 | |||
1747 | mutex_lock(¤t->perf_counter_mutex); | ||
1748 | list_for_each_entry(counter, ¤t->perf_counter_list, owner_entry) | ||
1749 | perf_counter_for_each_child(counter, perf_counter_enable); | ||
1750 | mutex_unlock(¤t->perf_counter_mutex); | ||
1751 | |||
1752 | return 0; | ||
1753 | } | ||
1754 | |||
1755 | int perf_counter_task_disable(void) | ||
1756 | { | ||
1757 | struct perf_counter *counter; | ||
1758 | |||
1759 | mutex_lock(¤t->perf_counter_mutex); | ||
1760 | list_for_each_entry(counter, ¤t->perf_counter_list, owner_entry) | ||
1761 | perf_counter_for_each_child(counter, perf_counter_disable); | ||
1762 | mutex_unlock(¤t->perf_counter_mutex); | ||
1763 | |||
1764 | return 0; | ||
1765 | } | ||
1766 | |||
1767 | /* | ||
1768 | * Callers need to ensure there can be no nesting of this function, otherwise | ||
1769 | * the seqlock logic goes bad. We can not serialize this because the arch | ||
1770 | * code calls this from NMI context. | ||
1771 | */ | ||
1772 | void perf_counter_update_userpage(struct perf_counter *counter) | ||
1773 | { | ||
1774 | struct perf_counter_mmap_page *userpg; | ||
1775 | struct perf_mmap_data *data; | ||
1776 | |||
1777 | rcu_read_lock(); | ||
1778 | data = rcu_dereference(counter->data); | ||
1779 | if (!data) | ||
1780 | goto unlock; | ||
1781 | |||
1782 | userpg = data->user_page; | ||
1783 | |||
1784 | /* | ||
1785 | * Disable preemption so as to not let the corresponding user-space | ||
1786 | * spin too long if we get preempted. | ||
1787 | */ | ||
1788 | preempt_disable(); | ||
1789 | ++userpg->lock; | ||
1790 | barrier(); | ||
1791 | userpg->index = counter->hw.idx; | ||
1792 | userpg->offset = atomic64_read(&counter->count); | ||
1793 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) | ||
1794 | userpg->offset -= atomic64_read(&counter->hw.prev_count); | ||
1795 | |||
1796 | barrier(); | ||
1797 | ++userpg->lock; | ||
1798 | preempt_enable(); | ||
1799 | unlock: | ||
1800 | rcu_read_unlock(); | ||
1801 | } | ||
1802 | |||
1803 | static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | ||
1804 | { | ||
1805 | struct perf_counter *counter = vma->vm_file->private_data; | ||
1806 | struct perf_mmap_data *data; | ||
1807 | int ret = VM_FAULT_SIGBUS; | ||
1808 | |||
1809 | rcu_read_lock(); | ||
1810 | data = rcu_dereference(counter->data); | ||
1811 | if (!data) | ||
1812 | goto unlock; | ||
1813 | |||
1814 | if (vmf->pgoff == 0) { | ||
1815 | vmf->page = virt_to_page(data->user_page); | ||
1816 | } else { | ||
1817 | int nr = vmf->pgoff - 1; | ||
1818 | |||
1819 | if ((unsigned)nr > data->nr_pages) | ||
1820 | goto unlock; | ||
1821 | |||
1822 | vmf->page = virt_to_page(data->data_pages[nr]); | ||
1823 | } | ||
1824 | get_page(vmf->page); | ||
1825 | ret = 0; | ||
1826 | unlock: | ||
1827 | rcu_read_unlock(); | ||
1828 | |||
1829 | return ret; | ||
1830 | } | ||
1831 | |||
1832 | static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages) | ||
1833 | { | ||
1834 | struct perf_mmap_data *data; | ||
1835 | unsigned long size; | ||
1836 | int i; | ||
1837 | |||
1838 | WARN_ON(atomic_read(&counter->mmap_count)); | ||
1839 | |||
1840 | size = sizeof(struct perf_mmap_data); | ||
1841 | size += nr_pages * sizeof(void *); | ||
1842 | |||
1843 | data = kzalloc(size, GFP_KERNEL); | ||
1844 | if (!data) | ||
1845 | goto fail; | ||
1846 | |||
1847 | data->user_page = (void *)get_zeroed_page(GFP_KERNEL); | ||
1848 | if (!data->user_page) | ||
1849 | goto fail_user_page; | ||
1850 | |||
1851 | for (i = 0; i < nr_pages; i++) { | ||
1852 | data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL); | ||
1853 | if (!data->data_pages[i]) | ||
1854 | goto fail_data_pages; | ||
1855 | } | ||
1856 | |||
1857 | data->nr_pages = nr_pages; | ||
1858 | atomic_set(&data->lock, -1); | ||
1859 | |||
1860 | rcu_assign_pointer(counter->data, data); | ||
1861 | |||
1862 | return 0; | ||
1863 | |||
1864 | fail_data_pages: | ||
1865 | for (i--; i >= 0; i--) | ||
1866 | free_page((unsigned long)data->data_pages[i]); | ||
1867 | |||
1868 | free_page((unsigned long)data->user_page); | ||
1869 | |||
1870 | fail_user_page: | ||
1871 | kfree(data); | ||
1872 | |||
1873 | fail: | ||
1874 | return -ENOMEM; | ||
1875 | } | ||
1876 | |||
1877 | static void __perf_mmap_data_free(struct rcu_head *rcu_head) | ||
1878 | { | ||
1879 | struct perf_mmap_data *data; | ||
1880 | int i; | ||
1881 | |||
1882 | data = container_of(rcu_head, struct perf_mmap_data, rcu_head); | ||
1883 | |||
1884 | free_page((unsigned long)data->user_page); | ||
1885 | for (i = 0; i < data->nr_pages; i++) | ||
1886 | free_page((unsigned long)data->data_pages[i]); | ||
1887 | kfree(data); | ||
1888 | } | ||
1889 | |||
1890 | static void perf_mmap_data_free(struct perf_counter *counter) | ||
1891 | { | ||
1892 | struct perf_mmap_data *data = counter->data; | ||
1893 | |||
1894 | WARN_ON(atomic_read(&counter->mmap_count)); | ||
1895 | |||
1896 | rcu_assign_pointer(counter->data, NULL); | ||
1897 | call_rcu(&data->rcu_head, __perf_mmap_data_free); | ||
1898 | } | ||
1899 | |||
1900 | static void perf_mmap_open(struct vm_area_struct *vma) | ||
1901 | { | ||
1902 | struct perf_counter *counter = vma->vm_file->private_data; | ||
1903 | |||
1904 | atomic_inc(&counter->mmap_count); | ||
1905 | } | ||
1906 | |||
1907 | static void perf_mmap_close(struct vm_area_struct *vma) | ||
1908 | { | ||
1909 | struct perf_counter *counter = vma->vm_file->private_data; | ||
1910 | |||
1911 | WARN_ON_ONCE(counter->ctx->parent_ctx); | ||
1912 | if (atomic_dec_and_mutex_lock(&counter->mmap_count, &counter->mmap_mutex)) { | ||
1913 | struct user_struct *user = current_user(); | ||
1914 | |||
1915 | atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm); | ||
1916 | vma->vm_mm->locked_vm -= counter->data->nr_locked; | ||
1917 | perf_mmap_data_free(counter); | ||
1918 | mutex_unlock(&counter->mmap_mutex); | ||
1919 | } | ||
1920 | } | ||
1921 | |||
1922 | static struct vm_operations_struct perf_mmap_vmops = { | ||
1923 | .open = perf_mmap_open, | ||
1924 | .close = perf_mmap_close, | ||
1925 | .fault = perf_mmap_fault, | ||
1926 | }; | ||
1927 | |||
1928 | static int perf_mmap(struct file *file, struct vm_area_struct *vma) | ||
1929 | { | ||
1930 | struct perf_counter *counter = file->private_data; | ||
1931 | unsigned long user_locked, user_lock_limit; | ||
1932 | struct user_struct *user = current_user(); | ||
1933 | unsigned long locked, lock_limit; | ||
1934 | unsigned long vma_size; | ||
1935 | unsigned long nr_pages; | ||
1936 | long user_extra, extra; | ||
1937 | int ret = 0; | ||
1938 | |||
1939 | if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) | ||
1940 | return -EINVAL; | ||
1941 | |||
1942 | vma_size = vma->vm_end - vma->vm_start; | ||
1943 | nr_pages = (vma_size / PAGE_SIZE) - 1; | ||
1944 | |||
1945 | /* | ||
1946 | * If we have data pages ensure they're a power-of-two number, so we | ||
1947 | * can do bitmasks instead of modulo. | ||
1948 | */ | ||
1949 | if (nr_pages != 0 && !is_power_of_2(nr_pages)) | ||
1950 | return -EINVAL; | ||
1951 | |||
1952 | if (vma_size != PAGE_SIZE * (1 + nr_pages)) | ||
1953 | return -EINVAL; | ||
1954 | |||
1955 | if (vma->vm_pgoff != 0) | ||
1956 | return -EINVAL; | ||
1957 | |||
1958 | WARN_ON_ONCE(counter->ctx->parent_ctx); | ||
1959 | mutex_lock(&counter->mmap_mutex); | ||
1960 | if (atomic_inc_not_zero(&counter->mmap_count)) { | ||
1961 | if (nr_pages != counter->data->nr_pages) | ||
1962 | ret = -EINVAL; | ||
1963 | goto unlock; | ||
1964 | } | ||
1965 | |||
1966 | user_extra = nr_pages + 1; | ||
1967 | user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10); | ||
1968 | |||
1969 | /* | ||
1970 | * Increase the limit linearly with more CPUs: | ||
1971 | */ | ||
1972 | user_lock_limit *= num_online_cpus(); | ||
1973 | |||
1974 | user_locked = atomic_long_read(&user->locked_vm) + user_extra; | ||
1975 | |||
1976 | extra = 0; | ||
1977 | if (user_locked > user_lock_limit) | ||
1978 | extra = user_locked - user_lock_limit; | ||
1979 | |||
1980 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; | ||
1981 | lock_limit >>= PAGE_SHIFT; | ||
1982 | locked = vma->vm_mm->locked_vm + extra; | ||
1983 | |||
1984 | if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { | ||
1985 | ret = -EPERM; | ||
1986 | goto unlock; | ||
1987 | } | ||
1988 | |||
1989 | WARN_ON(counter->data); | ||
1990 | ret = perf_mmap_data_alloc(counter, nr_pages); | ||
1991 | if (ret) | ||
1992 | goto unlock; | ||
1993 | |||
1994 | atomic_set(&counter->mmap_count, 1); | ||
1995 | atomic_long_add(user_extra, &user->locked_vm); | ||
1996 | vma->vm_mm->locked_vm += extra; | ||
1997 | counter->data->nr_locked = extra; | ||
1998 | unlock: | ||
1999 | mutex_unlock(&counter->mmap_mutex); | ||
2000 | |||
2001 | vma->vm_flags &= ~VM_MAYWRITE; | ||
2002 | vma->vm_flags |= VM_RESERVED; | ||
2003 | vma->vm_ops = &perf_mmap_vmops; | ||
2004 | |||
2005 | return ret; | ||
2006 | } | ||
2007 | |||
2008 | static int perf_fasync(int fd, struct file *filp, int on) | ||
2009 | { | ||
2010 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
2011 | struct perf_counter *counter = filp->private_data; | ||
2012 | int retval; | ||
2013 | |||
2014 | mutex_lock(&inode->i_mutex); | ||
2015 | retval = fasync_helper(fd, filp, on, &counter->fasync); | ||
2016 | mutex_unlock(&inode->i_mutex); | ||
2017 | |||
2018 | if (retval < 0) | ||
2019 | return retval; | ||
2020 | |||
2021 | return 0; | ||
2022 | } | ||
2023 | |||
2024 | static const struct file_operations perf_fops = { | ||
2025 | .release = perf_release, | ||
2026 | .read = perf_read, | ||
2027 | .poll = perf_poll, | ||
2028 | .unlocked_ioctl = perf_ioctl, | ||
2029 | .compat_ioctl = perf_ioctl, | ||
2030 | .mmap = perf_mmap, | ||
2031 | .fasync = perf_fasync, | ||
2032 | }; | ||
2033 | |||
2034 | /* | ||
2035 | * Perf counter wakeup | ||
2036 | * | ||
2037 | * If there's data, ensure we set the poll() state and publish everything | ||
2038 | * to user-space before waking everybody up. | ||
2039 | */ | ||
2040 | |||
2041 | void perf_counter_wakeup(struct perf_counter *counter) | ||
2042 | { | ||
2043 | wake_up_all(&counter->waitq); | ||
2044 | |||
2045 | if (counter->pending_kill) { | ||
2046 | kill_fasync(&counter->fasync, SIGIO, counter->pending_kill); | ||
2047 | counter->pending_kill = 0; | ||
2048 | } | ||
2049 | } | ||
2050 | |||
2051 | /* | ||
2052 | * Pending wakeups | ||
2053 | * | ||
2054 | * Handle the case where we need to wakeup up from NMI (or rq->lock) context. | ||
2055 | * | ||
2056 | * The NMI bit means we cannot possibly take locks. Therefore, maintain a | ||
2057 | * single linked list and use cmpxchg() to add entries lockless. | ||
2058 | */ | ||
2059 | |||
2060 | static void perf_pending_counter(struct perf_pending_entry *entry) | ||
2061 | { | ||
2062 | struct perf_counter *counter = container_of(entry, | ||
2063 | struct perf_counter, pending); | ||
2064 | |||
2065 | if (counter->pending_disable) { | ||
2066 | counter->pending_disable = 0; | ||
2067 | perf_counter_disable(counter); | ||
2068 | } | ||
2069 | |||
2070 | if (counter->pending_wakeup) { | ||
2071 | counter->pending_wakeup = 0; | ||
2072 | perf_counter_wakeup(counter); | ||
2073 | } | ||
2074 | } | ||
2075 | |||
2076 | #define PENDING_TAIL ((struct perf_pending_entry *)-1UL) | ||
2077 | |||
2078 | static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = { | ||
2079 | PENDING_TAIL, | ||
2080 | }; | ||
2081 | |||
2082 | static void perf_pending_queue(struct perf_pending_entry *entry, | ||
2083 | void (*func)(struct perf_pending_entry *)) | ||
2084 | { | ||
2085 | struct perf_pending_entry **head; | ||
2086 | |||
2087 | if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL) | ||
2088 | return; | ||
2089 | |||
2090 | entry->func = func; | ||
2091 | |||
2092 | head = &get_cpu_var(perf_pending_head); | ||
2093 | |||
2094 | do { | ||
2095 | entry->next = *head; | ||
2096 | } while (cmpxchg(head, entry->next, entry) != entry->next); | ||
2097 | |||
2098 | set_perf_counter_pending(); | ||
2099 | |||
2100 | put_cpu_var(perf_pending_head); | ||
2101 | } | ||
2102 | |||
2103 | static int __perf_pending_run(void) | ||
2104 | { | ||
2105 | struct perf_pending_entry *list; | ||
2106 | int nr = 0; | ||
2107 | |||
2108 | list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL); | ||
2109 | while (list != PENDING_TAIL) { | ||
2110 | void (*func)(struct perf_pending_entry *); | ||
2111 | struct perf_pending_entry *entry = list; | ||
2112 | |||
2113 | list = list->next; | ||
2114 | |||
2115 | func = entry->func; | ||
2116 | entry->next = NULL; | ||
2117 | /* | ||
2118 | * Ensure we observe the unqueue before we issue the wakeup, | ||
2119 | * so that we won't be waiting forever. | ||
2120 | * -- see perf_not_pending(). | ||
2121 | */ | ||
2122 | smp_wmb(); | ||
2123 | |||
2124 | func(entry); | ||
2125 | nr++; | ||
2126 | } | ||
2127 | |||
2128 | return nr; | ||
2129 | } | ||
2130 | |||
2131 | static inline int perf_not_pending(struct perf_counter *counter) | ||
2132 | { | ||
2133 | /* | ||
2134 | * If we flush on whatever cpu we run, there is a chance we don't | ||
2135 | * need to wait. | ||
2136 | */ | ||
2137 | get_cpu(); | ||
2138 | __perf_pending_run(); | ||
2139 | put_cpu(); | ||
2140 | |||
2141 | /* | ||
2142 | * Ensure we see the proper queue state before going to sleep | ||
2143 | * so that we do not miss the wakeup. -- see perf_pending_handle() | ||
2144 | */ | ||
2145 | smp_rmb(); | ||
2146 | return counter->pending.next == NULL; | ||
2147 | } | ||
2148 | |||
2149 | static void perf_pending_sync(struct perf_counter *counter) | ||
2150 | { | ||
2151 | wait_event(counter->waitq, perf_not_pending(counter)); | ||
2152 | } | ||
2153 | |||
2154 | void perf_counter_do_pending(void) | ||
2155 | { | ||
2156 | __perf_pending_run(); | ||
2157 | } | ||
2158 | |||
2159 | /* | ||
2160 | * Callchain support -- arch specific | ||
2161 | */ | ||
2162 | |||
2163 | __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
2164 | { | ||
2165 | return NULL; | ||
2166 | } | ||
2167 | |||
2168 | /* | ||
2169 | * Output | ||
2170 | */ | ||
2171 | |||
2172 | struct perf_output_handle { | ||
2173 | struct perf_counter *counter; | ||
2174 | struct perf_mmap_data *data; | ||
2175 | unsigned long head; | ||
2176 | unsigned long offset; | ||
2177 | int nmi; | ||
2178 | int overflow; | ||
2179 | int locked; | ||
2180 | unsigned long flags; | ||
2181 | }; | ||
2182 | |||
2183 | static void perf_output_wakeup(struct perf_output_handle *handle) | ||
2184 | { | ||
2185 | atomic_set(&handle->data->poll, POLL_IN); | ||
2186 | |||
2187 | if (handle->nmi) { | ||
2188 | handle->counter->pending_wakeup = 1; | ||
2189 | perf_pending_queue(&handle->counter->pending, | ||
2190 | perf_pending_counter); | ||
2191 | } else | ||
2192 | perf_counter_wakeup(handle->counter); | ||
2193 | } | ||
2194 | |||
2195 | /* | ||
2196 | * Curious locking construct. | ||
2197 | * | ||
2198 | * We need to ensure a later event doesn't publish a head when a former | ||
2199 | * event isn't done writing. However since we need to deal with NMIs we | ||
2200 | * cannot fully serialize things. | ||
2201 | * | ||
2202 | * What we do is serialize between CPUs so we only have to deal with NMI | ||
2203 | * nesting on a single CPU. | ||
2204 | * | ||
2205 | * We only publish the head (and generate a wakeup) when the outer-most | ||
2206 | * event completes. | ||
2207 | */ | ||
2208 | static void perf_output_lock(struct perf_output_handle *handle) | ||
2209 | { | ||
2210 | struct perf_mmap_data *data = handle->data; | ||
2211 | int cpu; | ||
2212 | |||
2213 | handle->locked = 0; | ||
2214 | |||
2215 | local_irq_save(handle->flags); | ||
2216 | cpu = smp_processor_id(); | ||
2217 | |||
2218 | if (in_nmi() && atomic_read(&data->lock) == cpu) | ||
2219 | return; | ||
2220 | |||
2221 | while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) | ||
2222 | cpu_relax(); | ||
2223 | |||
2224 | handle->locked = 1; | ||
2225 | } | ||
2226 | |||
2227 | static void perf_output_unlock(struct perf_output_handle *handle) | ||
2228 | { | ||
2229 | struct perf_mmap_data *data = handle->data; | ||
2230 | unsigned long head; | ||
2231 | int cpu; | ||
2232 | |||
2233 | data->done_head = data->head; | ||
2234 | |||
2235 | if (!handle->locked) | ||
2236 | goto out; | ||
2237 | |||
2238 | again: | ||
2239 | /* | ||
2240 | * The xchg implies a full barrier that ensures all writes are done | ||
2241 | * before we publish the new head, matched by a rmb() in userspace when | ||
2242 | * reading this position. | ||
2243 | */ | ||
2244 | while ((head = atomic_long_xchg(&data->done_head, 0))) | ||
2245 | data->user_page->data_head = head; | ||
2246 | |||
2247 | /* | ||
2248 | * NMI can happen here, which means we can miss a done_head update. | ||
2249 | */ | ||
2250 | |||
2251 | cpu = atomic_xchg(&data->lock, -1); | ||
2252 | WARN_ON_ONCE(cpu != smp_processor_id()); | ||
2253 | |||
2254 | /* | ||
2255 | * Therefore we have to validate we did not indeed do so. | ||
2256 | */ | ||
2257 | if (unlikely(atomic_long_read(&data->done_head))) { | ||
2258 | /* | ||
2259 | * Since we had it locked, we can lock it again. | ||
2260 | */ | ||
2261 | while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) | ||
2262 | cpu_relax(); | ||
2263 | |||
2264 | goto again; | ||
2265 | } | ||
2266 | |||
2267 | if (atomic_xchg(&data->wakeup, 0)) | ||
2268 | perf_output_wakeup(handle); | ||
2269 | out: | ||
2270 | local_irq_restore(handle->flags); | ||
2271 | } | ||
2272 | |||
2273 | static int perf_output_begin(struct perf_output_handle *handle, | ||
2274 | struct perf_counter *counter, unsigned int size, | ||
2275 | int nmi, int overflow) | ||
2276 | { | ||
2277 | struct perf_mmap_data *data; | ||
2278 | unsigned int offset, head; | ||
2279 | |||
2280 | /* | ||
2281 | * For inherited counters we send all the output towards the parent. | ||
2282 | */ | ||
2283 | if (counter->parent) | ||
2284 | counter = counter->parent; | ||
2285 | |||
2286 | rcu_read_lock(); | ||
2287 | data = rcu_dereference(counter->data); | ||
2288 | if (!data) | ||
2289 | goto out; | ||
2290 | |||
2291 | handle->data = data; | ||
2292 | handle->counter = counter; | ||
2293 | handle->nmi = nmi; | ||
2294 | handle->overflow = overflow; | ||
2295 | |||
2296 | if (!data->nr_pages) | ||
2297 | goto fail; | ||
2298 | |||
2299 | perf_output_lock(handle); | ||
2300 | |||
2301 | do { | ||
2302 | offset = head = atomic_long_read(&data->head); | ||
2303 | head += size; | ||
2304 | } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); | ||
2305 | |||
2306 | handle->offset = offset; | ||
2307 | handle->head = head; | ||
2308 | |||
2309 | if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT)) | ||
2310 | atomic_set(&data->wakeup, 1); | ||
2311 | |||
2312 | return 0; | ||
2313 | |||
2314 | fail: | ||
2315 | perf_output_wakeup(handle); | ||
2316 | out: | ||
2317 | rcu_read_unlock(); | ||
2318 | |||
2319 | return -ENOSPC; | ||
2320 | } | ||
2321 | |||
2322 | static void perf_output_copy(struct perf_output_handle *handle, | ||
2323 | const void *buf, unsigned int len) | ||
2324 | { | ||
2325 | unsigned int pages_mask; | ||
2326 | unsigned int offset; | ||
2327 | unsigned int size; | ||
2328 | void **pages; | ||
2329 | |||
2330 | offset = handle->offset; | ||
2331 | pages_mask = handle->data->nr_pages - 1; | ||
2332 | pages = handle->data->data_pages; | ||
2333 | |||
2334 | do { | ||
2335 | unsigned int page_offset; | ||
2336 | int nr; | ||
2337 | |||
2338 | nr = (offset >> PAGE_SHIFT) & pages_mask; | ||
2339 | page_offset = offset & (PAGE_SIZE - 1); | ||
2340 | size = min_t(unsigned int, PAGE_SIZE - page_offset, len); | ||
2341 | |||
2342 | memcpy(pages[nr] + page_offset, buf, size); | ||
2343 | |||
2344 | len -= size; | ||
2345 | buf += size; | ||
2346 | offset += size; | ||
2347 | } while (len); | ||
2348 | |||
2349 | handle->offset = offset; | ||
2350 | |||
2351 | /* | ||
2352 | * Check we didn't copy past our reservation window, taking the | ||
2353 | * possible unsigned int wrap into account. | ||
2354 | */ | ||
2355 | WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); | ||
2356 | } | ||
2357 | |||
2358 | #define perf_output_put(handle, x) \ | ||
2359 | perf_output_copy((handle), &(x), sizeof(x)) | ||
2360 | |||
2361 | static void perf_output_end(struct perf_output_handle *handle) | ||
2362 | { | ||
2363 | struct perf_counter *counter = handle->counter; | ||
2364 | struct perf_mmap_data *data = handle->data; | ||
2365 | |||
2366 | int wakeup_events = counter->attr.wakeup_events; | ||
2367 | |||
2368 | if (handle->overflow && wakeup_events) { | ||
2369 | int events = atomic_inc_return(&data->events); | ||
2370 | if (events >= wakeup_events) { | ||
2371 | atomic_sub(wakeup_events, &data->events); | ||
2372 | atomic_set(&data->wakeup, 1); | ||
2373 | } | ||
2374 | } | ||
2375 | |||
2376 | perf_output_unlock(handle); | ||
2377 | rcu_read_unlock(); | ||
2378 | } | ||
2379 | |||
2380 | static u32 perf_counter_pid(struct perf_counter *counter, struct task_struct *p) | ||
2381 | { | ||
2382 | /* | ||
2383 | * only top level counters have the pid namespace they were created in | ||
2384 | */ | ||
2385 | if (counter->parent) | ||
2386 | counter = counter->parent; | ||
2387 | |||
2388 | return task_tgid_nr_ns(p, counter->ns); | ||
2389 | } | ||
2390 | |||
2391 | static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p) | ||
2392 | { | ||
2393 | /* | ||
2394 | * only top level counters have the pid namespace they were created in | ||
2395 | */ | ||
2396 | if (counter->parent) | ||
2397 | counter = counter->parent; | ||
2398 | |||
2399 | return task_pid_nr_ns(p, counter->ns); | ||
2400 | } | ||
2401 | |||
2402 | static void perf_counter_output(struct perf_counter *counter, int nmi, | ||
2403 | struct perf_sample_data *data) | ||
2404 | { | ||
2405 | int ret; | ||
2406 | u64 sample_type = counter->attr.sample_type; | ||
2407 | struct perf_output_handle handle; | ||
2408 | struct perf_event_header header; | ||
2409 | u64 ip; | ||
2410 | struct { | ||
2411 | u32 pid, tid; | ||
2412 | } tid_entry; | ||
2413 | struct { | ||
2414 | u64 id; | ||
2415 | u64 counter; | ||
2416 | } group_entry; | ||
2417 | struct perf_callchain_entry *callchain = NULL; | ||
2418 | int callchain_size = 0; | ||
2419 | u64 time; | ||
2420 | struct { | ||
2421 | u32 cpu, reserved; | ||
2422 | } cpu_entry; | ||
2423 | |||
2424 | header.type = 0; | ||
2425 | header.size = sizeof(header); | ||
2426 | |||
2427 | header.misc = PERF_EVENT_MISC_OVERFLOW; | ||
2428 | header.misc |= perf_misc_flags(data->regs); | ||
2429 | |||
2430 | if (sample_type & PERF_SAMPLE_IP) { | ||
2431 | ip = perf_instruction_pointer(data->regs); | ||
2432 | header.type |= PERF_SAMPLE_IP; | ||
2433 | header.size += sizeof(ip); | ||
2434 | } | ||
2435 | |||
2436 | if (sample_type & PERF_SAMPLE_TID) { | ||
2437 | /* namespace issues */ | ||
2438 | tid_entry.pid = perf_counter_pid(counter, current); | ||
2439 | tid_entry.tid = perf_counter_tid(counter, current); | ||
2440 | |||
2441 | header.type |= PERF_SAMPLE_TID; | ||
2442 | header.size += sizeof(tid_entry); | ||
2443 | } | ||
2444 | |||
2445 | if (sample_type & PERF_SAMPLE_TIME) { | ||
2446 | /* | ||
2447 | * Maybe do better on x86 and provide cpu_clock_nmi() | ||
2448 | */ | ||
2449 | time = sched_clock(); | ||
2450 | |||
2451 | header.type |= PERF_SAMPLE_TIME; | ||
2452 | header.size += sizeof(u64); | ||
2453 | } | ||
2454 | |||
2455 | if (sample_type & PERF_SAMPLE_ADDR) { | ||
2456 | header.type |= PERF_SAMPLE_ADDR; | ||
2457 | header.size += sizeof(u64); | ||
2458 | } | ||
2459 | |||
2460 | if (sample_type & PERF_SAMPLE_ID) { | ||
2461 | header.type |= PERF_SAMPLE_ID; | ||
2462 | header.size += sizeof(u64); | ||
2463 | } | ||
2464 | |||
2465 | if (sample_type & PERF_SAMPLE_CPU) { | ||
2466 | header.type |= PERF_SAMPLE_CPU; | ||
2467 | header.size += sizeof(cpu_entry); | ||
2468 | |||
2469 | cpu_entry.cpu = raw_smp_processor_id(); | ||
2470 | } | ||
2471 | |||
2472 | if (sample_type & PERF_SAMPLE_PERIOD) { | ||
2473 | header.type |= PERF_SAMPLE_PERIOD; | ||
2474 | header.size += sizeof(u64); | ||
2475 | } | ||
2476 | |||
2477 | if (sample_type & PERF_SAMPLE_GROUP) { | ||
2478 | header.type |= PERF_SAMPLE_GROUP; | ||
2479 | header.size += sizeof(u64) + | ||
2480 | counter->nr_siblings * sizeof(group_entry); | ||
2481 | } | ||
2482 | |||
2483 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { | ||
2484 | callchain = perf_callchain(data->regs); | ||
2485 | |||
2486 | if (callchain) { | ||
2487 | callchain_size = (1 + callchain->nr) * sizeof(u64); | ||
2488 | |||
2489 | header.type |= PERF_SAMPLE_CALLCHAIN; | ||
2490 | header.size += callchain_size; | ||
2491 | } | ||
2492 | } | ||
2493 | |||
2494 | ret = perf_output_begin(&handle, counter, header.size, nmi, 1); | ||
2495 | if (ret) | ||
2496 | return; | ||
2497 | |||
2498 | perf_output_put(&handle, header); | ||
2499 | |||
2500 | if (sample_type & PERF_SAMPLE_IP) | ||
2501 | perf_output_put(&handle, ip); | ||
2502 | |||
2503 | if (sample_type & PERF_SAMPLE_TID) | ||
2504 | perf_output_put(&handle, tid_entry); | ||
2505 | |||
2506 | if (sample_type & PERF_SAMPLE_TIME) | ||
2507 | perf_output_put(&handle, time); | ||
2508 | |||
2509 | if (sample_type & PERF_SAMPLE_ADDR) | ||
2510 | perf_output_put(&handle, data->addr); | ||
2511 | |||
2512 | if (sample_type & PERF_SAMPLE_ID) | ||
2513 | perf_output_put(&handle, counter->id); | ||
2514 | |||
2515 | if (sample_type & PERF_SAMPLE_CPU) | ||
2516 | perf_output_put(&handle, cpu_entry); | ||
2517 | |||
2518 | if (sample_type & PERF_SAMPLE_PERIOD) | ||
2519 | perf_output_put(&handle, data->period); | ||
2520 | |||
2521 | /* | ||
2522 | * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult. | ||
2523 | */ | ||
2524 | if (sample_type & PERF_SAMPLE_GROUP) { | ||
2525 | struct perf_counter *leader, *sub; | ||
2526 | u64 nr = counter->nr_siblings; | ||
2527 | |||
2528 | perf_output_put(&handle, nr); | ||
2529 | |||
2530 | leader = counter->group_leader; | ||
2531 | list_for_each_entry(sub, &leader->sibling_list, list_entry) { | ||
2532 | if (sub != counter) | ||
2533 | sub->pmu->read(sub); | ||
2534 | |||
2535 | group_entry.id = sub->id; | ||
2536 | group_entry.counter = atomic64_read(&sub->count); | ||
2537 | |||
2538 | perf_output_put(&handle, group_entry); | ||
2539 | } | ||
2540 | } | ||
2541 | |||
2542 | if (callchain) | ||
2543 | perf_output_copy(&handle, callchain, callchain_size); | ||
2544 | |||
2545 | perf_output_end(&handle); | ||
2546 | } | ||
2547 | |||
2548 | /* | ||
2549 | * fork tracking | ||
2550 | */ | ||
2551 | |||
2552 | struct perf_fork_event { | ||
2553 | struct task_struct *task; | ||
2554 | |||
2555 | struct { | ||
2556 | struct perf_event_header header; | ||
2557 | |||
2558 | u32 pid; | ||
2559 | u32 ppid; | ||
2560 | } event; | ||
2561 | }; | ||
2562 | |||
2563 | static void perf_counter_fork_output(struct perf_counter *counter, | ||
2564 | struct perf_fork_event *fork_event) | ||
2565 | { | ||
2566 | struct perf_output_handle handle; | ||
2567 | int size = fork_event->event.header.size; | ||
2568 | struct task_struct *task = fork_event->task; | ||
2569 | int ret = perf_output_begin(&handle, counter, size, 0, 0); | ||
2570 | |||
2571 | if (ret) | ||
2572 | return; | ||
2573 | |||
2574 | fork_event->event.pid = perf_counter_pid(counter, task); | ||
2575 | fork_event->event.ppid = perf_counter_pid(counter, task->real_parent); | ||
2576 | |||
2577 | perf_output_put(&handle, fork_event->event); | ||
2578 | perf_output_end(&handle); | ||
2579 | } | ||
2580 | |||
2581 | static int perf_counter_fork_match(struct perf_counter *counter) | ||
2582 | { | ||
2583 | if (counter->attr.comm || counter->attr.mmap) | ||
2584 | return 1; | ||
2585 | |||
2586 | return 0; | ||
2587 | } | ||
2588 | |||
2589 | static void perf_counter_fork_ctx(struct perf_counter_context *ctx, | ||
2590 | struct perf_fork_event *fork_event) | ||
2591 | { | ||
2592 | struct perf_counter *counter; | ||
2593 | |||
2594 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
2595 | return; | ||
2596 | |||
2597 | rcu_read_lock(); | ||
2598 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { | ||
2599 | if (perf_counter_fork_match(counter)) | ||
2600 | perf_counter_fork_output(counter, fork_event); | ||
2601 | } | ||
2602 | rcu_read_unlock(); | ||
2603 | } | ||
2604 | |||
2605 | static void perf_counter_fork_event(struct perf_fork_event *fork_event) | ||
2606 | { | ||
2607 | struct perf_cpu_context *cpuctx; | ||
2608 | struct perf_counter_context *ctx; | ||
2609 | |||
2610 | cpuctx = &get_cpu_var(perf_cpu_context); | ||
2611 | perf_counter_fork_ctx(&cpuctx->ctx, fork_event); | ||
2612 | put_cpu_var(perf_cpu_context); | ||
2613 | |||
2614 | rcu_read_lock(); | ||
2615 | /* | ||
2616 | * doesn't really matter which of the child contexts the | ||
2617 | * events ends up in. | ||
2618 | */ | ||
2619 | ctx = rcu_dereference(current->perf_counter_ctxp); | ||
2620 | if (ctx) | ||
2621 | perf_counter_fork_ctx(ctx, fork_event); | ||
2622 | rcu_read_unlock(); | ||
2623 | } | ||
2624 | |||
2625 | void perf_counter_fork(struct task_struct *task) | ||
2626 | { | ||
2627 | struct perf_fork_event fork_event; | ||
2628 | |||
2629 | if (!atomic_read(&nr_comm_counters) && | ||
2630 | !atomic_read(&nr_mmap_counters)) | ||
2631 | return; | ||
2632 | |||
2633 | fork_event = (struct perf_fork_event){ | ||
2634 | .task = task, | ||
2635 | .event = { | ||
2636 | .header = { | ||
2637 | .type = PERF_EVENT_FORK, | ||
2638 | .size = sizeof(fork_event.event), | ||
2639 | }, | ||
2640 | }, | ||
2641 | }; | ||
2642 | |||
2643 | perf_counter_fork_event(&fork_event); | ||
2644 | } | ||
2645 | |||
2646 | /* | ||
2647 | * comm tracking | ||
2648 | */ | ||
2649 | |||
2650 | struct perf_comm_event { | ||
2651 | struct task_struct *task; | ||
2652 | char *comm; | ||
2653 | int comm_size; | ||
2654 | |||
2655 | struct { | ||
2656 | struct perf_event_header header; | ||
2657 | |||
2658 | u32 pid; | ||
2659 | u32 tid; | ||
2660 | } event; | ||
2661 | }; | ||
2662 | |||
2663 | static void perf_counter_comm_output(struct perf_counter *counter, | ||
2664 | struct perf_comm_event *comm_event) | ||
2665 | { | ||
2666 | struct perf_output_handle handle; | ||
2667 | int size = comm_event->event.header.size; | ||
2668 | int ret = perf_output_begin(&handle, counter, size, 0, 0); | ||
2669 | |||
2670 | if (ret) | ||
2671 | return; | ||
2672 | |||
2673 | comm_event->event.pid = perf_counter_pid(counter, comm_event->task); | ||
2674 | comm_event->event.tid = perf_counter_tid(counter, comm_event->task); | ||
2675 | |||
2676 | perf_output_put(&handle, comm_event->event); | ||
2677 | perf_output_copy(&handle, comm_event->comm, | ||
2678 | comm_event->comm_size); | ||
2679 | perf_output_end(&handle); | ||
2680 | } | ||
2681 | |||
2682 | static int perf_counter_comm_match(struct perf_counter *counter) | ||
2683 | { | ||
2684 | if (counter->attr.comm) | ||
2685 | return 1; | ||
2686 | |||
2687 | return 0; | ||
2688 | } | ||
2689 | |||
2690 | static void perf_counter_comm_ctx(struct perf_counter_context *ctx, | ||
2691 | struct perf_comm_event *comm_event) | ||
2692 | { | ||
2693 | struct perf_counter *counter; | ||
2694 | |||
2695 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
2696 | return; | ||
2697 | |||
2698 | rcu_read_lock(); | ||
2699 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { | ||
2700 | if (perf_counter_comm_match(counter)) | ||
2701 | perf_counter_comm_output(counter, comm_event); | ||
2702 | } | ||
2703 | rcu_read_unlock(); | ||
2704 | } | ||
2705 | |||
2706 | static void perf_counter_comm_event(struct perf_comm_event *comm_event) | ||
2707 | { | ||
2708 | struct perf_cpu_context *cpuctx; | ||
2709 | struct perf_counter_context *ctx; | ||
2710 | unsigned int size; | ||
2711 | char *comm = comm_event->task->comm; | ||
2712 | |||
2713 | size = ALIGN(strlen(comm)+1, sizeof(u64)); | ||
2714 | |||
2715 | comm_event->comm = comm; | ||
2716 | comm_event->comm_size = size; | ||
2717 | |||
2718 | comm_event->event.header.size = sizeof(comm_event->event) + size; | ||
2719 | |||
2720 | cpuctx = &get_cpu_var(perf_cpu_context); | ||
2721 | perf_counter_comm_ctx(&cpuctx->ctx, comm_event); | ||
2722 | put_cpu_var(perf_cpu_context); | ||
2723 | |||
2724 | rcu_read_lock(); | ||
2725 | /* | ||
2726 | * doesn't really matter which of the child contexts the | ||
2727 | * events ends up in. | ||
2728 | */ | ||
2729 | ctx = rcu_dereference(current->perf_counter_ctxp); | ||
2730 | if (ctx) | ||
2731 | perf_counter_comm_ctx(ctx, comm_event); | ||
2732 | rcu_read_unlock(); | ||
2733 | } | ||
2734 | |||
2735 | void perf_counter_comm(struct task_struct *task) | ||
2736 | { | ||
2737 | struct perf_comm_event comm_event; | ||
2738 | |||
2739 | if (!atomic_read(&nr_comm_counters)) | ||
2740 | return; | ||
2741 | |||
2742 | comm_event = (struct perf_comm_event){ | ||
2743 | .task = task, | ||
2744 | .event = { | ||
2745 | .header = { .type = PERF_EVENT_COMM, }, | ||
2746 | }, | ||
2747 | }; | ||
2748 | |||
2749 | perf_counter_comm_event(&comm_event); | ||
2750 | } | ||
2751 | |||
2752 | /* | ||
2753 | * mmap tracking | ||
2754 | */ | ||
2755 | |||
2756 | struct perf_mmap_event { | ||
2757 | struct vm_area_struct *vma; | ||
2758 | |||
2759 | const char *file_name; | ||
2760 | int file_size; | ||
2761 | |||
2762 | struct { | ||
2763 | struct perf_event_header header; | ||
2764 | |||
2765 | u32 pid; | ||
2766 | u32 tid; | ||
2767 | u64 start; | ||
2768 | u64 len; | ||
2769 | u64 pgoff; | ||
2770 | } event; | ||
2771 | }; | ||
2772 | |||
2773 | static void perf_counter_mmap_output(struct perf_counter *counter, | ||
2774 | struct perf_mmap_event *mmap_event) | ||
2775 | { | ||
2776 | struct perf_output_handle handle; | ||
2777 | int size = mmap_event->event.header.size; | ||
2778 | int ret = perf_output_begin(&handle, counter, size, 0, 0); | ||
2779 | |||
2780 | if (ret) | ||
2781 | return; | ||
2782 | |||
2783 | mmap_event->event.pid = perf_counter_pid(counter, current); | ||
2784 | mmap_event->event.tid = perf_counter_tid(counter, current); | ||
2785 | |||
2786 | perf_output_put(&handle, mmap_event->event); | ||
2787 | perf_output_copy(&handle, mmap_event->file_name, | ||
2788 | mmap_event->file_size); | ||
2789 | perf_output_end(&handle); | ||
2790 | } | ||
2791 | |||
2792 | static int perf_counter_mmap_match(struct perf_counter *counter, | ||
2793 | struct perf_mmap_event *mmap_event) | ||
2794 | { | ||
2795 | if (counter->attr.mmap) | ||
2796 | return 1; | ||
2797 | |||
2798 | return 0; | ||
2799 | } | ||
2800 | |||
2801 | static void perf_counter_mmap_ctx(struct perf_counter_context *ctx, | ||
2802 | struct perf_mmap_event *mmap_event) | ||
2803 | { | ||
2804 | struct perf_counter *counter; | ||
2805 | |||
2806 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
2807 | return; | ||
2808 | |||
2809 | rcu_read_lock(); | ||
2810 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { | ||
2811 | if (perf_counter_mmap_match(counter, mmap_event)) | ||
2812 | perf_counter_mmap_output(counter, mmap_event); | ||
2813 | } | ||
2814 | rcu_read_unlock(); | ||
2815 | } | ||
2816 | |||
2817 | static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event) | ||
2818 | { | ||
2819 | struct perf_cpu_context *cpuctx; | ||
2820 | struct perf_counter_context *ctx; | ||
2821 | struct vm_area_struct *vma = mmap_event->vma; | ||
2822 | struct file *file = vma->vm_file; | ||
2823 | unsigned int size; | ||
2824 | char tmp[16]; | ||
2825 | char *buf = NULL; | ||
2826 | const char *name; | ||
2827 | |||
2828 | if (file) { | ||
2829 | buf = kzalloc(PATH_MAX, GFP_KERNEL); | ||
2830 | if (!buf) { | ||
2831 | name = strncpy(tmp, "//enomem", sizeof(tmp)); | ||
2832 | goto got_name; | ||
2833 | } | ||
2834 | name = d_path(&file->f_path, buf, PATH_MAX); | ||
2835 | if (IS_ERR(name)) { | ||
2836 | name = strncpy(tmp, "//toolong", sizeof(tmp)); | ||
2837 | goto got_name; | ||
2838 | } | ||
2839 | } else { | ||
2840 | name = arch_vma_name(mmap_event->vma); | ||
2841 | if (name) | ||
2842 | goto got_name; | ||
2843 | |||
2844 | if (!vma->vm_mm) { | ||
2845 | name = strncpy(tmp, "[vdso]", sizeof(tmp)); | ||
2846 | goto got_name; | ||
2847 | } | ||
2848 | |||
2849 | name = strncpy(tmp, "//anon", sizeof(tmp)); | ||
2850 | goto got_name; | ||
2851 | } | ||
2852 | |||
2853 | got_name: | ||
2854 | size = ALIGN(strlen(name)+1, sizeof(u64)); | ||
2855 | |||
2856 | mmap_event->file_name = name; | ||
2857 | mmap_event->file_size = size; | ||
2858 | |||
2859 | mmap_event->event.header.size = sizeof(mmap_event->event) + size; | ||
2860 | |||
2861 | cpuctx = &get_cpu_var(perf_cpu_context); | ||
2862 | perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event); | ||
2863 | put_cpu_var(perf_cpu_context); | ||
2864 | |||
2865 | rcu_read_lock(); | ||
2866 | /* | ||
2867 | * doesn't really matter which of the child contexts the | ||
2868 | * events ends up in. | ||
2869 | */ | ||
2870 | ctx = rcu_dereference(current->perf_counter_ctxp); | ||
2871 | if (ctx) | ||
2872 | perf_counter_mmap_ctx(ctx, mmap_event); | ||
2873 | rcu_read_unlock(); | ||
2874 | |||
2875 | kfree(buf); | ||
2876 | } | ||
2877 | |||
2878 | void __perf_counter_mmap(struct vm_area_struct *vma) | ||
2879 | { | ||
2880 | struct perf_mmap_event mmap_event; | ||
2881 | |||
2882 | if (!atomic_read(&nr_mmap_counters)) | ||
2883 | return; | ||
2884 | |||
2885 | mmap_event = (struct perf_mmap_event){ | ||
2886 | .vma = vma, | ||
2887 | .event = { | ||
2888 | .header = { .type = PERF_EVENT_MMAP, }, | ||
2889 | .start = vma->vm_start, | ||
2890 | .len = vma->vm_end - vma->vm_start, | ||
2891 | .pgoff = vma->vm_pgoff, | ||
2892 | }, | ||
2893 | }; | ||
2894 | |||
2895 | perf_counter_mmap_event(&mmap_event); | ||
2896 | } | ||
2897 | |||
2898 | /* | ||
2899 | * Log sample_period changes so that analyzing tools can re-normalize the | ||
2900 | * event flow. | ||
2901 | */ | ||
2902 | |||
2903 | struct freq_event { | ||
2904 | struct perf_event_header header; | ||
2905 | u64 time; | ||
2906 | u64 id; | ||
2907 | u64 period; | ||
2908 | }; | ||
2909 | |||
2910 | static void perf_log_period(struct perf_counter *counter, u64 period) | ||
2911 | { | ||
2912 | struct perf_output_handle handle; | ||
2913 | struct freq_event event; | ||
2914 | int ret; | ||
2915 | |||
2916 | if (counter->hw.sample_period == period) | ||
2917 | return; | ||
2918 | |||
2919 | if (counter->attr.sample_type & PERF_SAMPLE_PERIOD) | ||
2920 | return; | ||
2921 | |||
2922 | event = (struct freq_event) { | ||
2923 | .header = { | ||
2924 | .type = PERF_EVENT_PERIOD, | ||
2925 | .misc = 0, | ||
2926 | .size = sizeof(event), | ||
2927 | }, | ||
2928 | .time = sched_clock(), | ||
2929 | .id = counter->id, | ||
2930 | .period = period, | ||
2931 | }; | ||
2932 | |||
2933 | ret = perf_output_begin(&handle, counter, sizeof(event), 1, 0); | ||
2934 | if (ret) | ||
2935 | return; | ||
2936 | |||
2937 | perf_output_put(&handle, event); | ||
2938 | perf_output_end(&handle); | ||
2939 | } | ||
2940 | |||
2941 | /* | ||
2942 | * IRQ throttle logging | ||
2943 | */ | ||
2944 | |||
2945 | static void perf_log_throttle(struct perf_counter *counter, int enable) | ||
2946 | { | ||
2947 | struct perf_output_handle handle; | ||
2948 | int ret; | ||
2949 | |||
2950 | struct { | ||
2951 | struct perf_event_header header; | ||
2952 | u64 time; | ||
2953 | u64 id; | ||
2954 | } throttle_event = { | ||
2955 | .header = { | ||
2956 | .type = PERF_EVENT_THROTTLE + 1, | ||
2957 | .misc = 0, | ||
2958 | .size = sizeof(throttle_event), | ||
2959 | }, | ||
2960 | .time = sched_clock(), | ||
2961 | .id = counter->id, | ||
2962 | }; | ||
2963 | |||
2964 | ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0); | ||
2965 | if (ret) | ||
2966 | return; | ||
2967 | |||
2968 | perf_output_put(&handle, throttle_event); | ||
2969 | perf_output_end(&handle); | ||
2970 | } | ||
2971 | |||
2972 | /* | ||
2973 | * Generic counter overflow handling. | ||
2974 | */ | ||
2975 | |||
2976 | int perf_counter_overflow(struct perf_counter *counter, int nmi, | ||
2977 | struct perf_sample_data *data) | ||
2978 | { | ||
2979 | int events = atomic_read(&counter->event_limit); | ||
2980 | int throttle = counter->pmu->unthrottle != NULL; | ||
2981 | struct hw_perf_counter *hwc = &counter->hw; | ||
2982 | int ret = 0; | ||
2983 | |||
2984 | if (!throttle) { | ||
2985 | hwc->interrupts++; | ||
2986 | } else { | ||
2987 | if (hwc->interrupts != MAX_INTERRUPTS) { | ||
2988 | hwc->interrupts++; | ||
2989 | if (HZ * hwc->interrupts > | ||
2990 | (u64)sysctl_perf_counter_sample_rate) { | ||
2991 | hwc->interrupts = MAX_INTERRUPTS; | ||
2992 | perf_log_throttle(counter, 0); | ||
2993 | ret = 1; | ||
2994 | } | ||
2995 | } else { | ||
2996 | /* | ||
2997 | * Keep re-disabling counters even though on the previous | ||
2998 | * pass we disabled it - just in case we raced with a | ||
2999 | * sched-in and the counter got enabled again: | ||
3000 | */ | ||
3001 | ret = 1; | ||
3002 | } | ||
3003 | } | ||
3004 | |||
3005 | if (counter->attr.freq) { | ||
3006 | u64 now = sched_clock(); | ||
3007 | s64 delta = now - hwc->freq_stamp; | ||
3008 | |||
3009 | hwc->freq_stamp = now; | ||
3010 | |||
3011 | if (delta > 0 && delta < TICK_NSEC) | ||
3012 | perf_adjust_period(counter, NSEC_PER_SEC / (int)delta); | ||
3013 | } | ||
3014 | |||
3015 | /* | ||
3016 | * XXX event_limit might not quite work as expected on inherited | ||
3017 | * counters | ||
3018 | */ | ||
3019 | |||
3020 | counter->pending_kill = POLL_IN; | ||
3021 | if (events && atomic_dec_and_test(&counter->event_limit)) { | ||
3022 | ret = 1; | ||
3023 | counter->pending_kill = POLL_HUP; | ||
3024 | if (nmi) { | ||
3025 | counter->pending_disable = 1; | ||
3026 | perf_pending_queue(&counter->pending, | ||
3027 | perf_pending_counter); | ||
3028 | } else | ||
3029 | perf_counter_disable(counter); | ||
3030 | } | ||
3031 | |||
3032 | perf_counter_output(counter, nmi, data); | ||
3033 | return ret; | ||
3034 | } | ||
3035 | |||
3036 | /* | ||
3037 | * Generic software counter infrastructure | ||
3038 | */ | ||
3039 | |||
3040 | static void perf_swcounter_update(struct perf_counter *counter) | ||
3041 | { | ||
3042 | struct hw_perf_counter *hwc = &counter->hw; | ||
3043 | u64 prev, now; | ||
3044 | s64 delta; | ||
3045 | |||
3046 | again: | ||
3047 | prev = atomic64_read(&hwc->prev_count); | ||
3048 | now = atomic64_read(&hwc->count); | ||
3049 | if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev) | ||
3050 | goto again; | ||
3051 | |||
3052 | delta = now - prev; | ||
3053 | |||
3054 | atomic64_add(delta, &counter->count); | ||
3055 | atomic64_sub(delta, &hwc->period_left); | ||
3056 | } | ||
3057 | |||
3058 | static void perf_swcounter_set_period(struct perf_counter *counter) | ||
3059 | { | ||
3060 | struct hw_perf_counter *hwc = &counter->hw; | ||
3061 | s64 left = atomic64_read(&hwc->period_left); | ||
3062 | s64 period = hwc->sample_period; | ||
3063 | |||
3064 | if (unlikely(left <= -period)) { | ||
3065 | left = period; | ||
3066 | atomic64_set(&hwc->period_left, left); | ||
3067 | hwc->last_period = period; | ||
3068 | } | ||
3069 | |||
3070 | if (unlikely(left <= 0)) { | ||
3071 | left += period; | ||
3072 | atomic64_add(period, &hwc->period_left); | ||
3073 | hwc->last_period = period; | ||
3074 | } | ||
3075 | |||
3076 | atomic64_set(&hwc->prev_count, -left); | ||
3077 | atomic64_set(&hwc->count, -left); | ||
3078 | } | ||
3079 | |||
3080 | static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) | ||
3081 | { | ||
3082 | enum hrtimer_restart ret = HRTIMER_RESTART; | ||
3083 | struct perf_sample_data data; | ||
3084 | struct perf_counter *counter; | ||
3085 | u64 period; | ||
3086 | |||
3087 | counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); | ||
3088 | counter->pmu->read(counter); | ||
3089 | |||
3090 | data.addr = 0; | ||
3091 | data.regs = get_irq_regs(); | ||
3092 | /* | ||
3093 | * In case we exclude kernel IPs or are somehow not in interrupt | ||
3094 | * context, provide the next best thing, the user IP. | ||
3095 | */ | ||
3096 | if ((counter->attr.exclude_kernel || !data.regs) && | ||
3097 | !counter->attr.exclude_user) | ||
3098 | data.regs = task_pt_regs(current); | ||
3099 | |||
3100 | if (data.regs) { | ||
3101 | if (perf_counter_overflow(counter, 0, &data)) | ||
3102 | ret = HRTIMER_NORESTART; | ||
3103 | } | ||
3104 | |||
3105 | period = max_t(u64, 10000, counter->hw.sample_period); | ||
3106 | hrtimer_forward_now(hrtimer, ns_to_ktime(period)); | ||
3107 | |||
3108 | return ret; | ||
3109 | } | ||
3110 | |||
3111 | static void perf_swcounter_overflow(struct perf_counter *counter, | ||
3112 | int nmi, struct pt_regs *regs, u64 addr) | ||
3113 | { | ||
3114 | struct perf_sample_data data = { | ||
3115 | .regs = regs, | ||
3116 | .addr = addr, | ||
3117 | .period = counter->hw.last_period, | ||
3118 | }; | ||
3119 | |||
3120 | perf_swcounter_update(counter); | ||
3121 | perf_swcounter_set_period(counter); | ||
3122 | if (perf_counter_overflow(counter, nmi, &data)) | ||
3123 | /* soft-disable the counter */ | ||
3124 | ; | ||
3125 | |||
3126 | } | ||
3127 | |||
3128 | static int perf_swcounter_is_counting(struct perf_counter *counter) | ||
3129 | { | ||
3130 | struct perf_counter_context *ctx; | ||
3131 | unsigned long flags; | ||
3132 | int count; | ||
3133 | |||
3134 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) | ||
3135 | return 1; | ||
3136 | |||
3137 | if (counter->state != PERF_COUNTER_STATE_INACTIVE) | ||
3138 | return 0; | ||
3139 | |||
3140 | /* | ||
3141 | * If the counter is inactive, it could be just because | ||
3142 | * its task is scheduled out, or because it's in a group | ||
3143 | * which could not go on the PMU. We want to count in | ||
3144 | * the first case but not the second. If the context is | ||
3145 | * currently active then an inactive software counter must | ||
3146 | * be the second case. If it's not currently active then | ||
3147 | * we need to know whether the counter was active when the | ||
3148 | * context was last active, which we can determine by | ||
3149 | * comparing counter->tstamp_stopped with ctx->time. | ||
3150 | * | ||
3151 | * We are within an RCU read-side critical section, | ||
3152 | * which protects the existence of *ctx. | ||
3153 | */ | ||
3154 | ctx = counter->ctx; | ||
3155 | spin_lock_irqsave(&ctx->lock, flags); | ||
3156 | count = 1; | ||
3157 | /* Re-check state now we have the lock */ | ||
3158 | if (counter->state < PERF_COUNTER_STATE_INACTIVE || | ||
3159 | counter->ctx->is_active || | ||
3160 | counter->tstamp_stopped < ctx->time) | ||
3161 | count = 0; | ||
3162 | spin_unlock_irqrestore(&ctx->lock, flags); | ||
3163 | return count; | ||
3164 | } | ||
3165 | |||
3166 | static int perf_swcounter_match(struct perf_counter *counter, | ||
3167 | enum perf_type_id type, | ||
3168 | u32 event, struct pt_regs *regs) | ||
3169 | { | ||
3170 | if (!perf_swcounter_is_counting(counter)) | ||
3171 | return 0; | ||
3172 | |||
3173 | if (counter->attr.type != type) | ||
3174 | return 0; | ||
3175 | if (counter->attr.config != event) | ||
3176 | return 0; | ||
3177 | |||
3178 | if (regs) { | ||
3179 | if (counter->attr.exclude_user && user_mode(regs)) | ||
3180 | return 0; | ||
3181 | |||
3182 | if (counter->attr.exclude_kernel && !user_mode(regs)) | ||
3183 | return 0; | ||
3184 | } | ||
3185 | |||
3186 | return 1; | ||
3187 | } | ||
3188 | |||
3189 | static void perf_swcounter_add(struct perf_counter *counter, u64 nr, | ||
3190 | int nmi, struct pt_regs *regs, u64 addr) | ||
3191 | { | ||
3192 | int neg = atomic64_add_negative(nr, &counter->hw.count); | ||
3193 | |||
3194 | if (counter->hw.sample_period && !neg && regs) | ||
3195 | perf_swcounter_overflow(counter, nmi, regs, addr); | ||
3196 | } | ||
3197 | |||
3198 | static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, | ||
3199 | enum perf_type_id type, u32 event, | ||
3200 | u64 nr, int nmi, struct pt_regs *regs, | ||
3201 | u64 addr) | ||
3202 | { | ||
3203 | struct perf_counter *counter; | ||
3204 | |||
3205 | if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) | ||
3206 | return; | ||
3207 | |||
3208 | rcu_read_lock(); | ||
3209 | list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { | ||
3210 | if (perf_swcounter_match(counter, type, event, regs)) | ||
3211 | perf_swcounter_add(counter, nr, nmi, regs, addr); | ||
3212 | } | ||
3213 | rcu_read_unlock(); | ||
3214 | } | ||
3215 | |||
3216 | static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) | ||
3217 | { | ||
3218 | if (in_nmi()) | ||
3219 | return &cpuctx->recursion[3]; | ||
3220 | |||
3221 | if (in_irq()) | ||
3222 | return &cpuctx->recursion[2]; | ||
3223 | |||
3224 | if (in_softirq()) | ||
3225 | return &cpuctx->recursion[1]; | ||
3226 | |||
3227 | return &cpuctx->recursion[0]; | ||
3228 | } | ||
3229 | |||
3230 | static void __perf_swcounter_event(enum perf_type_id type, u32 event, | ||
3231 | u64 nr, int nmi, struct pt_regs *regs, | ||
3232 | u64 addr) | ||
3233 | { | ||
3234 | struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); | ||
3235 | int *recursion = perf_swcounter_recursion_context(cpuctx); | ||
3236 | struct perf_counter_context *ctx; | ||
3237 | |||
3238 | if (*recursion) | ||
3239 | goto out; | ||
3240 | |||
3241 | (*recursion)++; | ||
3242 | barrier(); | ||
3243 | |||
3244 | perf_swcounter_ctx_event(&cpuctx->ctx, type, event, | ||
3245 | nr, nmi, regs, addr); | ||
3246 | rcu_read_lock(); | ||
3247 | /* | ||
3248 | * doesn't really matter which of the child contexts the | ||
3249 | * events ends up in. | ||
3250 | */ | ||
3251 | ctx = rcu_dereference(current->perf_counter_ctxp); | ||
3252 | if (ctx) | ||
3253 | perf_swcounter_ctx_event(ctx, type, event, nr, nmi, regs, addr); | ||
3254 | rcu_read_unlock(); | ||
3255 | |||
3256 | barrier(); | ||
3257 | (*recursion)--; | ||
3258 | |||
3259 | out: | ||
3260 | put_cpu_var(perf_cpu_context); | ||
3261 | } | ||
3262 | |||
3263 | void | ||
3264 | perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) | ||
3265 | { | ||
3266 | __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr); | ||
3267 | } | ||
3268 | |||
3269 | static void perf_swcounter_read(struct perf_counter *counter) | ||
3270 | { | ||
3271 | perf_swcounter_update(counter); | ||
3272 | } | ||
3273 | |||
3274 | static int perf_swcounter_enable(struct perf_counter *counter) | ||
3275 | { | ||
3276 | perf_swcounter_set_period(counter); | ||
3277 | return 0; | ||
3278 | } | ||
3279 | |||
3280 | static void perf_swcounter_disable(struct perf_counter *counter) | ||
3281 | { | ||
3282 | perf_swcounter_update(counter); | ||
3283 | } | ||
3284 | |||
3285 | static const struct pmu perf_ops_generic = { | ||
3286 | .enable = perf_swcounter_enable, | ||
3287 | .disable = perf_swcounter_disable, | ||
3288 | .read = perf_swcounter_read, | ||
3289 | }; | ||
3290 | |||
3291 | /* | ||
3292 | * Software counter: cpu wall time clock | ||
3293 | */ | ||
3294 | |||
3295 | static void cpu_clock_perf_counter_update(struct perf_counter *counter) | ||
3296 | { | ||
3297 | int cpu = raw_smp_processor_id(); | ||
3298 | s64 prev; | ||
3299 | u64 now; | ||
3300 | |||
3301 | now = cpu_clock(cpu); | ||
3302 | prev = atomic64_read(&counter->hw.prev_count); | ||
3303 | atomic64_set(&counter->hw.prev_count, now); | ||
3304 | atomic64_add(now - prev, &counter->count); | ||
3305 | } | ||
3306 | |||
3307 | static int cpu_clock_perf_counter_enable(struct perf_counter *counter) | ||
3308 | { | ||
3309 | struct hw_perf_counter *hwc = &counter->hw; | ||
3310 | int cpu = raw_smp_processor_id(); | ||
3311 | |||
3312 | atomic64_set(&hwc->prev_count, cpu_clock(cpu)); | ||
3313 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
3314 | hwc->hrtimer.function = perf_swcounter_hrtimer; | ||
3315 | if (hwc->sample_period) { | ||
3316 | u64 period = max_t(u64, 10000, hwc->sample_period); | ||
3317 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
3318 | ns_to_ktime(period), 0, | ||
3319 | HRTIMER_MODE_REL, 0); | ||
3320 | } | ||
3321 | |||
3322 | return 0; | ||
3323 | } | ||
3324 | |||
3325 | static void cpu_clock_perf_counter_disable(struct perf_counter *counter) | ||
3326 | { | ||
3327 | if (counter->hw.sample_period) | ||
3328 | hrtimer_cancel(&counter->hw.hrtimer); | ||
3329 | cpu_clock_perf_counter_update(counter); | ||
3330 | } | ||
3331 | |||
3332 | static void cpu_clock_perf_counter_read(struct perf_counter *counter) | ||
3333 | { | ||
3334 | cpu_clock_perf_counter_update(counter); | ||
3335 | } | ||
3336 | |||
3337 | static const struct pmu perf_ops_cpu_clock = { | ||
3338 | .enable = cpu_clock_perf_counter_enable, | ||
3339 | .disable = cpu_clock_perf_counter_disable, | ||
3340 | .read = cpu_clock_perf_counter_read, | ||
3341 | }; | ||
3342 | |||
3343 | /* | ||
3344 | * Software counter: task time clock | ||
3345 | */ | ||
3346 | |||
3347 | static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now) | ||
3348 | { | ||
3349 | u64 prev; | ||
3350 | s64 delta; | ||
3351 | |||
3352 | prev = atomic64_xchg(&counter->hw.prev_count, now); | ||
3353 | delta = now - prev; | ||
3354 | atomic64_add(delta, &counter->count); | ||
3355 | } | ||
3356 | |||
3357 | static int task_clock_perf_counter_enable(struct perf_counter *counter) | ||
3358 | { | ||
3359 | struct hw_perf_counter *hwc = &counter->hw; | ||
3360 | u64 now; | ||
3361 | |||
3362 | now = counter->ctx->time; | ||
3363 | |||
3364 | atomic64_set(&hwc->prev_count, now); | ||
3365 | hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
3366 | hwc->hrtimer.function = perf_swcounter_hrtimer; | ||
3367 | if (hwc->sample_period) { | ||
3368 | u64 period = max_t(u64, 10000, hwc->sample_period); | ||
3369 | __hrtimer_start_range_ns(&hwc->hrtimer, | ||
3370 | ns_to_ktime(period), 0, | ||
3371 | HRTIMER_MODE_REL, 0); | ||
3372 | } | ||
3373 | |||
3374 | return 0; | ||
3375 | } | ||
3376 | |||
3377 | static void task_clock_perf_counter_disable(struct perf_counter *counter) | ||
3378 | { | ||
3379 | if (counter->hw.sample_period) | ||
3380 | hrtimer_cancel(&counter->hw.hrtimer); | ||
3381 | task_clock_perf_counter_update(counter, counter->ctx->time); | ||
3382 | |||
3383 | } | ||
3384 | |||
3385 | static void task_clock_perf_counter_read(struct perf_counter *counter) | ||
3386 | { | ||
3387 | u64 time; | ||
3388 | |||
3389 | if (!in_nmi()) { | ||
3390 | update_context_time(counter->ctx); | ||
3391 | time = counter->ctx->time; | ||
3392 | } else { | ||
3393 | u64 now = perf_clock(); | ||
3394 | u64 delta = now - counter->ctx->timestamp; | ||
3395 | time = counter->ctx->time + delta; | ||
3396 | } | ||
3397 | |||
3398 | task_clock_perf_counter_update(counter, time); | ||
3399 | } | ||
3400 | |||
3401 | static const struct pmu perf_ops_task_clock = { | ||
3402 | .enable = task_clock_perf_counter_enable, | ||
3403 | .disable = task_clock_perf_counter_disable, | ||
3404 | .read = task_clock_perf_counter_read, | ||
3405 | }; | ||
3406 | |||
3407 | /* | ||
3408 | * Software counter: cpu migrations | ||
3409 | */ | ||
3410 | void perf_counter_task_migration(struct task_struct *task, int cpu) | ||
3411 | { | ||
3412 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
3413 | struct perf_counter_context *ctx; | ||
3414 | |||
3415 | perf_swcounter_ctx_event(&cpuctx->ctx, PERF_TYPE_SOFTWARE, | ||
3416 | PERF_COUNT_SW_CPU_MIGRATIONS, | ||
3417 | 1, 1, NULL, 0); | ||
3418 | |||
3419 | ctx = perf_pin_task_context(task); | ||
3420 | if (ctx) { | ||
3421 | perf_swcounter_ctx_event(ctx, PERF_TYPE_SOFTWARE, | ||
3422 | PERF_COUNT_SW_CPU_MIGRATIONS, | ||
3423 | 1, 1, NULL, 0); | ||
3424 | perf_unpin_context(ctx); | ||
3425 | } | ||
3426 | } | ||
3427 | |||
3428 | #ifdef CONFIG_EVENT_PROFILE | ||
3429 | void perf_tpcounter_event(int event_id) | ||
3430 | { | ||
3431 | struct pt_regs *regs = get_irq_regs(); | ||
3432 | |||
3433 | if (!regs) | ||
3434 | regs = task_pt_regs(current); | ||
3435 | |||
3436 | __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0); | ||
3437 | } | ||
3438 | EXPORT_SYMBOL_GPL(perf_tpcounter_event); | ||
3439 | |||
3440 | extern int ftrace_profile_enable(int); | ||
3441 | extern void ftrace_profile_disable(int); | ||
3442 | |||
3443 | static void tp_perf_counter_destroy(struct perf_counter *counter) | ||
3444 | { | ||
3445 | ftrace_profile_disable(perf_event_id(&counter->attr)); | ||
3446 | } | ||
3447 | |||
3448 | static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) | ||
3449 | { | ||
3450 | int event_id = perf_event_id(&counter->attr); | ||
3451 | int ret; | ||
3452 | |||
3453 | ret = ftrace_profile_enable(event_id); | ||
3454 | if (ret) | ||
3455 | return NULL; | ||
3456 | |||
3457 | counter->destroy = tp_perf_counter_destroy; | ||
3458 | |||
3459 | return &perf_ops_generic; | ||
3460 | } | ||
3461 | #else | ||
3462 | static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) | ||
3463 | { | ||
3464 | return NULL; | ||
3465 | } | ||
3466 | #endif | ||
3467 | |||
3468 | static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) | ||
3469 | { | ||
3470 | const struct pmu *pmu = NULL; | ||
3471 | |||
3472 | /* | ||
3473 | * Software counters (currently) can't in general distinguish | ||
3474 | * between user, kernel and hypervisor events. | ||
3475 | * However, context switches and cpu migrations are considered | ||
3476 | * to be kernel events, and page faults are never hypervisor | ||
3477 | * events. | ||
3478 | */ | ||
3479 | switch (counter->attr.config) { | ||
3480 | case PERF_COUNT_SW_CPU_CLOCK: | ||
3481 | pmu = &perf_ops_cpu_clock; | ||
3482 | |||
3483 | break; | ||
3484 | case PERF_COUNT_SW_TASK_CLOCK: | ||
3485 | /* | ||
3486 | * If the user instantiates this as a per-cpu counter, | ||
3487 | * use the cpu_clock counter instead. | ||
3488 | */ | ||
3489 | if (counter->ctx->task) | ||
3490 | pmu = &perf_ops_task_clock; | ||
3491 | else | ||
3492 | pmu = &perf_ops_cpu_clock; | ||
3493 | |||
3494 | break; | ||
3495 | case PERF_COUNT_SW_PAGE_FAULTS: | ||
3496 | case PERF_COUNT_SW_PAGE_FAULTS_MIN: | ||
3497 | case PERF_COUNT_SW_PAGE_FAULTS_MAJ: | ||
3498 | case PERF_COUNT_SW_CONTEXT_SWITCHES: | ||
3499 | case PERF_COUNT_SW_CPU_MIGRATIONS: | ||
3500 | pmu = &perf_ops_generic; | ||
3501 | break; | ||
3502 | } | ||
3503 | |||
3504 | return pmu; | ||
3505 | } | ||
3506 | |||
3507 | /* | ||
3508 | * Allocate and initialize a counter structure | ||
3509 | */ | ||
3510 | static struct perf_counter * | ||
3511 | perf_counter_alloc(struct perf_counter_attr *attr, | ||
3512 | int cpu, | ||
3513 | struct perf_counter_context *ctx, | ||
3514 | struct perf_counter *group_leader, | ||
3515 | gfp_t gfpflags) | ||
3516 | { | ||
3517 | const struct pmu *pmu; | ||
3518 | struct perf_counter *counter; | ||
3519 | struct hw_perf_counter *hwc; | ||
3520 | long err; | ||
3521 | |||
3522 | counter = kzalloc(sizeof(*counter), gfpflags); | ||
3523 | if (!counter) | ||
3524 | return ERR_PTR(-ENOMEM); | ||
3525 | |||
3526 | /* | ||
3527 | * Single counters are their own group leaders, with an | ||
3528 | * empty sibling list: | ||
3529 | */ | ||
3530 | if (!group_leader) | ||
3531 | group_leader = counter; | ||
3532 | |||
3533 | mutex_init(&counter->child_mutex); | ||
3534 | INIT_LIST_HEAD(&counter->child_list); | ||
3535 | |||
3536 | INIT_LIST_HEAD(&counter->list_entry); | ||
3537 | INIT_LIST_HEAD(&counter->event_entry); | ||
3538 | INIT_LIST_HEAD(&counter->sibling_list); | ||
3539 | init_waitqueue_head(&counter->waitq); | ||
3540 | |||
3541 | mutex_init(&counter->mmap_mutex); | ||
3542 | |||
3543 | counter->cpu = cpu; | ||
3544 | counter->attr = *attr; | ||
3545 | counter->group_leader = group_leader; | ||
3546 | counter->pmu = NULL; | ||
3547 | counter->ctx = ctx; | ||
3548 | counter->oncpu = -1; | ||
3549 | |||
3550 | counter->ns = get_pid_ns(current->nsproxy->pid_ns); | ||
3551 | counter->id = atomic64_inc_return(&perf_counter_id); | ||
3552 | |||
3553 | counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
3554 | |||
3555 | if (attr->disabled) | ||
3556 | counter->state = PERF_COUNTER_STATE_OFF; | ||
3557 | |||
3558 | pmu = NULL; | ||
3559 | |||
3560 | hwc = &counter->hw; | ||
3561 | hwc->sample_period = attr->sample_period; | ||
3562 | if (attr->freq && attr->sample_freq) | ||
3563 | hwc->sample_period = 1; | ||
3564 | |||
3565 | atomic64_set(&hwc->period_left, hwc->sample_period); | ||
3566 | |||
3567 | /* | ||
3568 | * we currently do not support PERF_SAMPLE_GROUP on inherited counters | ||
3569 | */ | ||
3570 | if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP)) | ||
3571 | goto done; | ||
3572 | |||
3573 | if (attr->type == PERF_TYPE_RAW) { | ||
3574 | pmu = hw_perf_counter_init(counter); | ||
3575 | goto done; | ||
3576 | } | ||
3577 | |||
3578 | switch (attr->type) { | ||
3579 | case PERF_TYPE_HARDWARE: | ||
3580 | case PERF_TYPE_HW_CACHE: | ||
3581 | pmu = hw_perf_counter_init(counter); | ||
3582 | break; | ||
3583 | |||
3584 | case PERF_TYPE_SOFTWARE: | ||
3585 | pmu = sw_perf_counter_init(counter); | ||
3586 | break; | ||
3587 | |||
3588 | case PERF_TYPE_TRACEPOINT: | ||
3589 | pmu = tp_perf_counter_init(counter); | ||
3590 | break; | ||
3591 | } | ||
3592 | done: | ||
3593 | err = 0; | ||
3594 | if (!pmu) | ||
3595 | err = -EINVAL; | ||
3596 | else if (IS_ERR(pmu)) | ||
3597 | err = PTR_ERR(pmu); | ||
3598 | |||
3599 | if (err) { | ||
3600 | if (counter->ns) | ||
3601 | put_pid_ns(counter->ns); | ||
3602 | kfree(counter); | ||
3603 | return ERR_PTR(err); | ||
3604 | } | ||
3605 | |||
3606 | counter->pmu = pmu; | ||
3607 | |||
3608 | atomic_inc(&nr_counters); | ||
3609 | if (counter->attr.mmap) | ||
3610 | atomic_inc(&nr_mmap_counters); | ||
3611 | if (counter->attr.comm) | ||
3612 | atomic_inc(&nr_comm_counters); | ||
3613 | |||
3614 | return counter; | ||
3615 | } | ||
3616 | |||
3617 | /** | ||
3618 | * sys_perf_counter_open - open a performance counter, associate it to a task/cpu | ||
3619 | * | ||
3620 | * @attr_uptr: event type attributes for monitoring/sampling | ||
3621 | * @pid: target pid | ||
3622 | * @cpu: target cpu | ||
3623 | * @group_fd: group leader counter fd | ||
3624 | */ | ||
3625 | SYSCALL_DEFINE5(perf_counter_open, | ||
3626 | const struct perf_counter_attr __user *, attr_uptr, | ||
3627 | pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) | ||
3628 | { | ||
3629 | struct perf_counter *counter, *group_leader; | ||
3630 | struct perf_counter_attr attr; | ||
3631 | struct perf_counter_context *ctx; | ||
3632 | struct file *counter_file = NULL; | ||
3633 | struct file *group_file = NULL; | ||
3634 | int fput_needed = 0; | ||
3635 | int fput_needed2 = 0; | ||
3636 | int ret; | ||
3637 | |||
3638 | /* for future expandability... */ | ||
3639 | if (flags) | ||
3640 | return -EINVAL; | ||
3641 | |||
3642 | if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0) | ||
3643 | return -EFAULT; | ||
3644 | |||
3645 | if (!attr.exclude_kernel) { | ||
3646 | if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) | ||
3647 | return -EACCES; | ||
3648 | } | ||
3649 | |||
3650 | if (attr.freq) { | ||
3651 | if (attr.sample_freq > sysctl_perf_counter_sample_rate) | ||
3652 | return -EINVAL; | ||
3653 | } | ||
3654 | |||
3655 | /* | ||
3656 | * Get the target context (task or percpu): | ||
3657 | */ | ||
3658 | ctx = find_get_context(pid, cpu); | ||
3659 | if (IS_ERR(ctx)) | ||
3660 | return PTR_ERR(ctx); | ||
3661 | |||
3662 | /* | ||
3663 | * Look up the group leader (we will attach this counter to it): | ||
3664 | */ | ||
3665 | group_leader = NULL; | ||
3666 | if (group_fd != -1) { | ||
3667 | ret = -EINVAL; | ||
3668 | group_file = fget_light(group_fd, &fput_needed); | ||
3669 | if (!group_file) | ||
3670 | goto err_put_context; | ||
3671 | if (group_file->f_op != &perf_fops) | ||
3672 | goto err_put_context; | ||
3673 | |||
3674 | group_leader = group_file->private_data; | ||
3675 | /* | ||
3676 | * Do not allow a recursive hierarchy (this new sibling | ||
3677 | * becoming part of another group-sibling): | ||
3678 | */ | ||
3679 | if (group_leader->group_leader != group_leader) | ||
3680 | goto err_put_context; | ||
3681 | /* | ||
3682 | * Do not allow to attach to a group in a different | ||
3683 | * task or CPU context: | ||
3684 | */ | ||
3685 | if (group_leader->ctx != ctx) | ||
3686 | goto err_put_context; | ||
3687 | /* | ||
3688 | * Only a group leader can be exclusive or pinned | ||
3689 | */ | ||
3690 | if (attr.exclusive || attr.pinned) | ||
3691 | goto err_put_context; | ||
3692 | } | ||
3693 | |||
3694 | counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, | ||
3695 | GFP_KERNEL); | ||
3696 | ret = PTR_ERR(counter); | ||
3697 | if (IS_ERR(counter)) | ||
3698 | goto err_put_context; | ||
3699 | |||
3700 | ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); | ||
3701 | if (ret < 0) | ||
3702 | goto err_free_put_context; | ||
3703 | |||
3704 | counter_file = fget_light(ret, &fput_needed2); | ||
3705 | if (!counter_file) | ||
3706 | goto err_free_put_context; | ||
3707 | |||
3708 | counter->filp = counter_file; | ||
3709 | WARN_ON_ONCE(ctx->parent_ctx); | ||
3710 | mutex_lock(&ctx->mutex); | ||
3711 | perf_install_in_context(ctx, counter, cpu); | ||
3712 | ++ctx->generation; | ||
3713 | mutex_unlock(&ctx->mutex); | ||
3714 | |||
3715 | counter->owner = current; | ||
3716 | get_task_struct(current); | ||
3717 | mutex_lock(¤t->perf_counter_mutex); | ||
3718 | list_add_tail(&counter->owner_entry, ¤t->perf_counter_list); | ||
3719 | mutex_unlock(¤t->perf_counter_mutex); | ||
3720 | |||
3721 | fput_light(counter_file, fput_needed2); | ||
3722 | |||
3723 | out_fput: | ||
3724 | fput_light(group_file, fput_needed); | ||
3725 | |||
3726 | return ret; | ||
3727 | |||
3728 | err_free_put_context: | ||
3729 | kfree(counter); | ||
3730 | |||
3731 | err_put_context: | ||
3732 | put_ctx(ctx); | ||
3733 | |||
3734 | goto out_fput; | ||
3735 | } | ||
3736 | |||
3737 | /* | ||
3738 | * inherit a counter from parent task to child task: | ||
3739 | */ | ||
3740 | static struct perf_counter * | ||
3741 | inherit_counter(struct perf_counter *parent_counter, | ||
3742 | struct task_struct *parent, | ||
3743 | struct perf_counter_context *parent_ctx, | ||
3744 | struct task_struct *child, | ||
3745 | struct perf_counter *group_leader, | ||
3746 | struct perf_counter_context *child_ctx) | ||
3747 | { | ||
3748 | struct perf_counter *child_counter; | ||
3749 | |||
3750 | /* | ||
3751 | * Instead of creating recursive hierarchies of counters, | ||
3752 | * we link inherited counters back to the original parent, | ||
3753 | * which has a filp for sure, which we use as the reference | ||
3754 | * count: | ||
3755 | */ | ||
3756 | if (parent_counter->parent) | ||
3757 | parent_counter = parent_counter->parent; | ||
3758 | |||
3759 | child_counter = perf_counter_alloc(&parent_counter->attr, | ||
3760 | parent_counter->cpu, child_ctx, | ||
3761 | group_leader, GFP_KERNEL); | ||
3762 | if (IS_ERR(child_counter)) | ||
3763 | return child_counter; | ||
3764 | get_ctx(child_ctx); | ||
3765 | |||
3766 | /* | ||
3767 | * Make the child state follow the state of the parent counter, | ||
3768 | * not its attr.disabled bit. We hold the parent's mutex, | ||
3769 | * so we won't race with perf_counter_{en, dis}able_family. | ||
3770 | */ | ||
3771 | if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE) | ||
3772 | child_counter->state = PERF_COUNTER_STATE_INACTIVE; | ||
3773 | else | ||
3774 | child_counter->state = PERF_COUNTER_STATE_OFF; | ||
3775 | |||
3776 | if (parent_counter->attr.freq) | ||
3777 | child_counter->hw.sample_period = parent_counter->hw.sample_period; | ||
3778 | |||
3779 | /* | ||
3780 | * Link it up in the child's context: | ||
3781 | */ | ||
3782 | add_counter_to_ctx(child_counter, child_ctx); | ||
3783 | |||
3784 | child_counter->parent = parent_counter; | ||
3785 | /* | ||
3786 | * inherit into child's child as well: | ||
3787 | */ | ||
3788 | child_counter->attr.inherit = 1; | ||
3789 | |||
3790 | /* | ||
3791 | * Get a reference to the parent filp - we will fput it | ||
3792 | * when the child counter exits. This is safe to do because | ||
3793 | * we are in the parent and we know that the filp still | ||
3794 | * exists and has a nonzero count: | ||
3795 | */ | ||
3796 | atomic_long_inc(&parent_counter->filp->f_count); | ||
3797 | |||
3798 | /* | ||
3799 | * Link this into the parent counter's child list | ||
3800 | */ | ||
3801 | WARN_ON_ONCE(parent_counter->ctx->parent_ctx); | ||
3802 | mutex_lock(&parent_counter->child_mutex); | ||
3803 | list_add_tail(&child_counter->child_list, &parent_counter->child_list); | ||
3804 | mutex_unlock(&parent_counter->child_mutex); | ||
3805 | |||
3806 | return child_counter; | ||
3807 | } | ||
3808 | |||
3809 | static int inherit_group(struct perf_counter *parent_counter, | ||
3810 | struct task_struct *parent, | ||
3811 | struct perf_counter_context *parent_ctx, | ||
3812 | struct task_struct *child, | ||
3813 | struct perf_counter_context *child_ctx) | ||
3814 | { | ||
3815 | struct perf_counter *leader; | ||
3816 | struct perf_counter *sub; | ||
3817 | struct perf_counter *child_ctr; | ||
3818 | |||
3819 | leader = inherit_counter(parent_counter, parent, parent_ctx, | ||
3820 | child, NULL, child_ctx); | ||
3821 | if (IS_ERR(leader)) | ||
3822 | return PTR_ERR(leader); | ||
3823 | list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) { | ||
3824 | child_ctr = inherit_counter(sub, parent, parent_ctx, | ||
3825 | child, leader, child_ctx); | ||
3826 | if (IS_ERR(child_ctr)) | ||
3827 | return PTR_ERR(child_ctr); | ||
3828 | } | ||
3829 | return 0; | ||
3830 | } | ||
3831 | |||
3832 | static void sync_child_counter(struct perf_counter *child_counter, | ||
3833 | struct perf_counter *parent_counter) | ||
3834 | { | ||
3835 | u64 child_val; | ||
3836 | |||
3837 | child_val = atomic64_read(&child_counter->count); | ||
3838 | |||
3839 | /* | ||
3840 | * Add back the child's count to the parent's count: | ||
3841 | */ | ||
3842 | atomic64_add(child_val, &parent_counter->count); | ||
3843 | atomic64_add(child_counter->total_time_enabled, | ||
3844 | &parent_counter->child_total_time_enabled); | ||
3845 | atomic64_add(child_counter->total_time_running, | ||
3846 | &parent_counter->child_total_time_running); | ||
3847 | |||
3848 | /* | ||
3849 | * Remove this counter from the parent's list | ||
3850 | */ | ||
3851 | WARN_ON_ONCE(parent_counter->ctx->parent_ctx); | ||
3852 | mutex_lock(&parent_counter->child_mutex); | ||
3853 | list_del_init(&child_counter->child_list); | ||
3854 | mutex_unlock(&parent_counter->child_mutex); | ||
3855 | |||
3856 | /* | ||
3857 | * Release the parent counter, if this was the last | ||
3858 | * reference to it. | ||
3859 | */ | ||
3860 | fput(parent_counter->filp); | ||
3861 | } | ||
3862 | |||
3863 | static void | ||
3864 | __perf_counter_exit_task(struct perf_counter *child_counter, | ||
3865 | struct perf_counter_context *child_ctx) | ||
3866 | { | ||
3867 | struct perf_counter *parent_counter; | ||
3868 | |||
3869 | update_counter_times(child_counter); | ||
3870 | perf_counter_remove_from_context(child_counter); | ||
3871 | |||
3872 | parent_counter = child_counter->parent; | ||
3873 | /* | ||
3874 | * It can happen that parent exits first, and has counters | ||
3875 | * that are still around due to the child reference. These | ||
3876 | * counters need to be zapped - but otherwise linger. | ||
3877 | */ | ||
3878 | if (parent_counter) { | ||
3879 | sync_child_counter(child_counter, parent_counter); | ||
3880 | free_counter(child_counter); | ||
3881 | } | ||
3882 | } | ||
3883 | |||
3884 | /* | ||
3885 | * When a child task exits, feed back counter values to parent counters. | ||
3886 | */ | ||
3887 | void perf_counter_exit_task(struct task_struct *child) | ||
3888 | { | ||
3889 | struct perf_counter *child_counter, *tmp; | ||
3890 | struct perf_counter_context *child_ctx; | ||
3891 | unsigned long flags; | ||
3892 | |||
3893 | if (likely(!child->perf_counter_ctxp)) | ||
3894 | return; | ||
3895 | |||
3896 | local_irq_save(flags); | ||
3897 | /* | ||
3898 | * We can't reschedule here because interrupts are disabled, | ||
3899 | * and either child is current or it is a task that can't be | ||
3900 | * scheduled, so we are now safe from rescheduling changing | ||
3901 | * our context. | ||
3902 | */ | ||
3903 | child_ctx = child->perf_counter_ctxp; | ||
3904 | __perf_counter_task_sched_out(child_ctx); | ||
3905 | |||
3906 | /* | ||
3907 | * Take the context lock here so that if find_get_context is | ||
3908 | * reading child->perf_counter_ctxp, we wait until it has | ||
3909 | * incremented the context's refcount before we do put_ctx below. | ||
3910 | */ | ||
3911 | spin_lock(&child_ctx->lock); | ||
3912 | child->perf_counter_ctxp = NULL; | ||
3913 | if (child_ctx->parent_ctx) { | ||
3914 | /* | ||
3915 | * This context is a clone; unclone it so it can't get | ||
3916 | * swapped to another process while we're removing all | ||
3917 | * the counters from it. | ||
3918 | */ | ||
3919 | put_ctx(child_ctx->parent_ctx); | ||
3920 | child_ctx->parent_ctx = NULL; | ||
3921 | } | ||
3922 | spin_unlock(&child_ctx->lock); | ||
3923 | local_irq_restore(flags); | ||
3924 | |||
3925 | /* | ||
3926 | * We can recurse on the same lock type through: | ||
3927 | * | ||
3928 | * __perf_counter_exit_task() | ||
3929 | * sync_child_counter() | ||
3930 | * fput(parent_counter->filp) | ||
3931 | * perf_release() | ||
3932 | * mutex_lock(&ctx->mutex) | ||
3933 | * | ||
3934 | * But since its the parent context it won't be the same instance. | ||
3935 | */ | ||
3936 | mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); | ||
3937 | |||
3938 | again: | ||
3939 | list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, | ||
3940 | list_entry) | ||
3941 | __perf_counter_exit_task(child_counter, child_ctx); | ||
3942 | |||
3943 | /* | ||
3944 | * If the last counter was a group counter, it will have appended all | ||
3945 | * its siblings to the list, but we obtained 'tmp' before that which | ||
3946 | * will still point to the list head terminating the iteration. | ||
3947 | */ | ||
3948 | if (!list_empty(&child_ctx->counter_list)) | ||
3949 | goto again; | ||
3950 | |||
3951 | mutex_unlock(&child_ctx->mutex); | ||
3952 | |||
3953 | put_ctx(child_ctx); | ||
3954 | } | ||
3955 | |||
3956 | /* | ||
3957 | * free an unexposed, unused context as created by inheritance by | ||
3958 | * init_task below, used by fork() in case of fail. | ||
3959 | */ | ||
3960 | void perf_counter_free_task(struct task_struct *task) | ||
3961 | { | ||
3962 | struct perf_counter_context *ctx = task->perf_counter_ctxp; | ||
3963 | struct perf_counter *counter, *tmp; | ||
3964 | |||
3965 | if (!ctx) | ||
3966 | return; | ||
3967 | |||
3968 | mutex_lock(&ctx->mutex); | ||
3969 | again: | ||
3970 | list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) { | ||
3971 | struct perf_counter *parent = counter->parent; | ||
3972 | |||
3973 | if (WARN_ON_ONCE(!parent)) | ||
3974 | continue; | ||
3975 | |||
3976 | mutex_lock(&parent->child_mutex); | ||
3977 | list_del_init(&counter->child_list); | ||
3978 | mutex_unlock(&parent->child_mutex); | ||
3979 | |||
3980 | fput(parent->filp); | ||
3981 | |||
3982 | list_del_counter(counter, ctx); | ||
3983 | free_counter(counter); | ||
3984 | } | ||
3985 | |||
3986 | if (!list_empty(&ctx->counter_list)) | ||
3987 | goto again; | ||
3988 | |||
3989 | mutex_unlock(&ctx->mutex); | ||
3990 | |||
3991 | put_ctx(ctx); | ||
3992 | } | ||
3993 | |||
3994 | /* | ||
3995 | * Initialize the perf_counter context in task_struct | ||
3996 | */ | ||
3997 | int perf_counter_init_task(struct task_struct *child) | ||
3998 | { | ||
3999 | struct perf_counter_context *child_ctx, *parent_ctx; | ||
4000 | struct perf_counter_context *cloned_ctx; | ||
4001 | struct perf_counter *counter; | ||
4002 | struct task_struct *parent = current; | ||
4003 | int inherited_all = 1; | ||
4004 | int ret = 0; | ||
4005 | |||
4006 | child->perf_counter_ctxp = NULL; | ||
4007 | |||
4008 | mutex_init(&child->perf_counter_mutex); | ||
4009 | INIT_LIST_HEAD(&child->perf_counter_list); | ||
4010 | |||
4011 | if (likely(!parent->perf_counter_ctxp)) | ||
4012 | return 0; | ||
4013 | |||
4014 | /* | ||
4015 | * This is executed from the parent task context, so inherit | ||
4016 | * counters that have been marked for cloning. | ||
4017 | * First allocate and initialize a context for the child. | ||
4018 | */ | ||
4019 | |||
4020 | child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); | ||
4021 | if (!child_ctx) | ||
4022 | return -ENOMEM; | ||
4023 | |||
4024 | __perf_counter_init_context(child_ctx, child); | ||
4025 | child->perf_counter_ctxp = child_ctx; | ||
4026 | get_task_struct(child); | ||
4027 | |||
4028 | /* | ||
4029 | * If the parent's context is a clone, pin it so it won't get | ||
4030 | * swapped under us. | ||
4031 | */ | ||
4032 | parent_ctx = perf_pin_task_context(parent); | ||
4033 | |||
4034 | /* | ||
4035 | * No need to check if parent_ctx != NULL here; since we saw | ||
4036 | * it non-NULL earlier, the only reason for it to become NULL | ||
4037 | * is if we exit, and since we're currently in the middle of | ||
4038 | * a fork we can't be exiting at the same time. | ||
4039 | */ | ||
4040 | |||
4041 | /* | ||
4042 | * Lock the parent list. No need to lock the child - not PID | ||
4043 | * hashed yet and not running, so nobody can access it. | ||
4044 | */ | ||
4045 | mutex_lock(&parent_ctx->mutex); | ||
4046 | |||
4047 | /* | ||
4048 | * We dont have to disable NMIs - we are only looking at | ||
4049 | * the list, not manipulating it: | ||
4050 | */ | ||
4051 | list_for_each_entry_rcu(counter, &parent_ctx->event_list, event_entry) { | ||
4052 | if (counter != counter->group_leader) | ||
4053 | continue; | ||
4054 | |||
4055 | if (!counter->attr.inherit) { | ||
4056 | inherited_all = 0; | ||
4057 | continue; | ||
4058 | } | ||
4059 | |||
4060 | ret = inherit_group(counter, parent, parent_ctx, | ||
4061 | child, child_ctx); | ||
4062 | if (ret) { | ||
4063 | inherited_all = 0; | ||
4064 | break; | ||
4065 | } | ||
4066 | } | ||
4067 | |||
4068 | if (inherited_all) { | ||
4069 | /* | ||
4070 | * Mark the child context as a clone of the parent | ||
4071 | * context, or of whatever the parent is a clone of. | ||
4072 | * Note that if the parent is a clone, it could get | ||
4073 | * uncloned at any point, but that doesn't matter | ||
4074 | * because the list of counters and the generation | ||
4075 | * count can't have changed since we took the mutex. | ||
4076 | */ | ||
4077 | cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); | ||
4078 | if (cloned_ctx) { | ||
4079 | child_ctx->parent_ctx = cloned_ctx; | ||
4080 | child_ctx->parent_gen = parent_ctx->parent_gen; | ||
4081 | } else { | ||
4082 | child_ctx->parent_ctx = parent_ctx; | ||
4083 | child_ctx->parent_gen = parent_ctx->generation; | ||
4084 | } | ||
4085 | get_ctx(child_ctx->parent_ctx); | ||
4086 | } | ||
4087 | |||
4088 | mutex_unlock(&parent_ctx->mutex); | ||
4089 | |||
4090 | perf_unpin_context(parent_ctx); | ||
4091 | |||
4092 | return ret; | ||
4093 | } | ||
4094 | |||
4095 | static void __cpuinit perf_counter_init_cpu(int cpu) | ||
4096 | { | ||
4097 | struct perf_cpu_context *cpuctx; | ||
4098 | |||
4099 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
4100 | __perf_counter_init_context(&cpuctx->ctx, NULL); | ||
4101 | |||
4102 | spin_lock(&perf_resource_lock); | ||
4103 | cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu; | ||
4104 | spin_unlock(&perf_resource_lock); | ||
4105 | |||
4106 | hw_perf_counter_setup(cpu); | ||
4107 | } | ||
4108 | |||
4109 | #ifdef CONFIG_HOTPLUG_CPU | ||
4110 | static void __perf_counter_exit_cpu(void *info) | ||
4111 | { | ||
4112 | struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); | ||
4113 | struct perf_counter_context *ctx = &cpuctx->ctx; | ||
4114 | struct perf_counter *counter, *tmp; | ||
4115 | |||
4116 | list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) | ||
4117 | __perf_counter_remove_from_context(counter); | ||
4118 | } | ||
4119 | static void perf_counter_exit_cpu(int cpu) | ||
4120 | { | ||
4121 | struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
4122 | struct perf_counter_context *ctx = &cpuctx->ctx; | ||
4123 | |||
4124 | mutex_lock(&ctx->mutex); | ||
4125 | smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1); | ||
4126 | mutex_unlock(&ctx->mutex); | ||
4127 | } | ||
4128 | #else | ||
4129 | static inline void perf_counter_exit_cpu(int cpu) { } | ||
4130 | #endif | ||
4131 | |||
4132 | static int __cpuinit | ||
4133 | perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) | ||
4134 | { | ||
4135 | unsigned int cpu = (long)hcpu; | ||
4136 | |||
4137 | switch (action) { | ||
4138 | |||
4139 | case CPU_UP_PREPARE: | ||
4140 | case CPU_UP_PREPARE_FROZEN: | ||
4141 | perf_counter_init_cpu(cpu); | ||
4142 | break; | ||
4143 | |||
4144 | case CPU_DOWN_PREPARE: | ||
4145 | case CPU_DOWN_PREPARE_FROZEN: | ||
4146 | perf_counter_exit_cpu(cpu); | ||
4147 | break; | ||
4148 | |||
4149 | default: | ||
4150 | break; | ||
4151 | } | ||
4152 | |||
4153 | return NOTIFY_OK; | ||
4154 | } | ||
4155 | |||
4156 | /* | ||
4157 | * This has to have a higher priority than migration_notifier in sched.c. | ||
4158 | */ | ||
4159 | static struct notifier_block __cpuinitdata perf_cpu_nb = { | ||
4160 | .notifier_call = perf_cpu_notify, | ||
4161 | .priority = 20, | ||
4162 | }; | ||
4163 | |||
4164 | void __init perf_counter_init(void) | ||
4165 | { | ||
4166 | perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, | ||
4167 | (void *)(long)smp_processor_id()); | ||
4168 | register_cpu_notifier(&perf_cpu_nb); | ||
4169 | } | ||
4170 | |||
4171 | static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf) | ||
4172 | { | ||
4173 | return sprintf(buf, "%d\n", perf_reserved_percpu); | ||
4174 | } | ||
4175 | |||
4176 | static ssize_t | ||
4177 | perf_set_reserve_percpu(struct sysdev_class *class, | ||
4178 | const char *buf, | ||
4179 | size_t count) | ||
4180 | { | ||
4181 | struct perf_cpu_context *cpuctx; | ||
4182 | unsigned long val; | ||
4183 | int err, cpu, mpt; | ||
4184 | |||
4185 | err = strict_strtoul(buf, 10, &val); | ||
4186 | if (err) | ||
4187 | return err; | ||
4188 | if (val > perf_max_counters) | ||
4189 | return -EINVAL; | ||
4190 | |||
4191 | spin_lock(&perf_resource_lock); | ||
4192 | perf_reserved_percpu = val; | ||
4193 | for_each_online_cpu(cpu) { | ||
4194 | cpuctx = &per_cpu(perf_cpu_context, cpu); | ||
4195 | spin_lock_irq(&cpuctx->ctx.lock); | ||
4196 | mpt = min(perf_max_counters - cpuctx->ctx.nr_counters, | ||
4197 | perf_max_counters - perf_reserved_percpu); | ||
4198 | cpuctx->max_pertask = mpt; | ||
4199 | spin_unlock_irq(&cpuctx->ctx.lock); | ||
4200 | } | ||
4201 | spin_unlock(&perf_resource_lock); | ||
4202 | |||
4203 | return count; | ||
4204 | } | ||
4205 | |||
4206 | static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf) | ||
4207 | { | ||
4208 | return sprintf(buf, "%d\n", perf_overcommit); | ||
4209 | } | ||
4210 | |||
4211 | static ssize_t | ||
4212 | perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count) | ||
4213 | { | ||
4214 | unsigned long val; | ||
4215 | int err; | ||
4216 | |||
4217 | err = strict_strtoul(buf, 10, &val); | ||
4218 | if (err) | ||
4219 | return err; | ||
4220 | if (val > 1) | ||
4221 | return -EINVAL; | ||
4222 | |||
4223 | spin_lock(&perf_resource_lock); | ||
4224 | perf_overcommit = val; | ||
4225 | spin_unlock(&perf_resource_lock); | ||
4226 | |||
4227 | return count; | ||
4228 | } | ||
4229 | |||
4230 | static SYSDEV_CLASS_ATTR( | ||
4231 | reserve_percpu, | ||
4232 | 0644, | ||
4233 | perf_show_reserve_percpu, | ||
4234 | perf_set_reserve_percpu | ||
4235 | ); | ||
4236 | |||
4237 | static SYSDEV_CLASS_ATTR( | ||
4238 | overcommit, | ||
4239 | 0644, | ||
4240 | perf_show_overcommit, | ||
4241 | perf_set_overcommit | ||
4242 | ); | ||
4243 | |||
4244 | static struct attribute *perfclass_attrs[] = { | ||
4245 | &attr_reserve_percpu.attr, | ||
4246 | &attr_overcommit.attr, | ||
4247 | NULL | ||
4248 | }; | ||
4249 | |||
4250 | static struct attribute_group perfclass_attr_group = { | ||
4251 | .attrs = perfclass_attrs, | ||
4252 | .name = "perf_counters", | ||
4253 | }; | ||
4254 | |||
4255 | static int __init perf_counter_sysfs_init(void) | ||
4256 | { | ||
4257 | return sysfs_create_group(&cpu_sysdev_class.kset.kobj, | ||
4258 | &perfclass_attr_group); | ||
4259 | } | ||
4260 | device_initcall(perf_counter_sysfs_init); | ||
diff --git a/kernel/profile.c b/kernel/profile.c index 7724e0409bae..28cf26ad2d24 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
@@ -111,12 +111,6 @@ int __ref profile_init(void) | |||
111 | /* only text is profiled */ | 111 | /* only text is profiled */ |
112 | prof_len = (_etext - _stext) >> prof_shift; | 112 | prof_len = (_etext - _stext) >> prof_shift; |
113 | buffer_bytes = prof_len*sizeof(atomic_t); | 113 | buffer_bytes = prof_len*sizeof(atomic_t); |
114 | if (!slab_is_available()) { | ||
115 | prof_buffer = alloc_bootmem(buffer_bytes); | ||
116 | alloc_bootmem_cpumask_var(&prof_cpu_mask); | ||
117 | cpumask_copy(prof_cpu_mask, cpu_possible_mask); | ||
118 | return 0; | ||
119 | } | ||
120 | 114 | ||
121 | if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) | 115 | if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) |
122 | return -ENOMEM; | 116 | return -ENOMEM; |
diff --git a/kernel/sched.c b/kernel/sched.c index 14c447ae5d53..f04aa9664504 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/completion.h> | 39 | #include <linux/completion.h> |
40 | #include <linux/kernel_stat.h> | 40 | #include <linux/kernel_stat.h> |
41 | #include <linux/debug_locks.h> | 41 | #include <linux/debug_locks.h> |
42 | #include <linux/perf_counter.h> | ||
42 | #include <linux/security.h> | 43 | #include <linux/security.h> |
43 | #include <linux/notifier.h> | 44 | #include <linux/notifier.h> |
44 | #include <linux/profile.h> | 45 | #include <linux/profile.h> |
@@ -68,7 +69,6 @@ | |||
68 | #include <linux/pagemap.h> | 69 | #include <linux/pagemap.h> |
69 | #include <linux/hrtimer.h> | 70 | #include <linux/hrtimer.h> |
70 | #include <linux/tick.h> | 71 | #include <linux/tick.h> |
71 | #include <linux/bootmem.h> | ||
72 | #include <linux/debugfs.h> | 72 | #include <linux/debugfs.h> |
73 | #include <linux/ctype.h> | 73 | #include <linux/ctype.h> |
74 | #include <linux/ftrace.h> | 74 | #include <linux/ftrace.h> |
@@ -580,6 +580,7 @@ struct rq { | |||
580 | struct load_weight load; | 580 | struct load_weight load; |
581 | unsigned long nr_load_updates; | 581 | unsigned long nr_load_updates; |
582 | u64 nr_switches; | 582 | u64 nr_switches; |
583 | u64 nr_migrations_in; | ||
583 | 584 | ||
584 | struct cfs_rq cfs; | 585 | struct cfs_rq cfs; |
585 | struct rt_rq rt; | 586 | struct rt_rq rt; |
@@ -692,7 +693,7 @@ static inline int cpu_of(struct rq *rq) | |||
692 | #define task_rq(p) cpu_rq(task_cpu(p)) | 693 | #define task_rq(p) cpu_rq(task_cpu(p)) |
693 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 694 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
694 | 695 | ||
695 | static inline void update_rq_clock(struct rq *rq) | 696 | inline void update_rq_clock(struct rq *rq) |
696 | { | 697 | { |
697 | rq->clock = sched_clock_cpu(cpu_of(rq)); | 698 | rq->clock = sched_clock_cpu(cpu_of(rq)); |
698 | } | 699 | } |
@@ -1969,12 +1970,16 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
1969 | p->se.sleep_start -= clock_offset; | 1970 | p->se.sleep_start -= clock_offset; |
1970 | if (p->se.block_start) | 1971 | if (p->se.block_start) |
1971 | p->se.block_start -= clock_offset; | 1972 | p->se.block_start -= clock_offset; |
1973 | #endif | ||
1972 | if (old_cpu != new_cpu) { | 1974 | if (old_cpu != new_cpu) { |
1973 | schedstat_inc(p, se.nr_migrations); | 1975 | p->se.nr_migrations++; |
1976 | new_rq->nr_migrations_in++; | ||
1977 | #ifdef CONFIG_SCHEDSTATS | ||
1974 | if (task_hot(p, old_rq->clock, NULL)) | 1978 | if (task_hot(p, old_rq->clock, NULL)) |
1975 | schedstat_inc(p, se.nr_forced2_migrations); | 1979 | schedstat_inc(p, se.nr_forced2_migrations); |
1976 | } | ||
1977 | #endif | 1980 | #endif |
1981 | perf_counter_task_migration(p, new_cpu); | ||
1982 | } | ||
1978 | p->se.vruntime -= old_cfsrq->min_vruntime - | 1983 | p->se.vruntime -= old_cfsrq->min_vruntime - |
1979 | new_cfsrq->min_vruntime; | 1984 | new_cfsrq->min_vruntime; |
1980 | 1985 | ||
@@ -2369,6 +2374,27 @@ static int sched_balance_self(int cpu, int flag) | |||
2369 | 2374 | ||
2370 | #endif /* CONFIG_SMP */ | 2375 | #endif /* CONFIG_SMP */ |
2371 | 2376 | ||
2377 | /** | ||
2378 | * task_oncpu_function_call - call a function on the cpu on which a task runs | ||
2379 | * @p: the task to evaluate | ||
2380 | * @func: the function to be called | ||
2381 | * @info: the function call argument | ||
2382 | * | ||
2383 | * Calls the function @func when the task is currently running. This might | ||
2384 | * be on the current CPU, which just calls the function directly | ||
2385 | */ | ||
2386 | void task_oncpu_function_call(struct task_struct *p, | ||
2387 | void (*func) (void *info), void *info) | ||
2388 | { | ||
2389 | int cpu; | ||
2390 | |||
2391 | preempt_disable(); | ||
2392 | cpu = task_cpu(p); | ||
2393 | if (task_curr(p)) | ||
2394 | smp_call_function_single(cpu, func, info, 1); | ||
2395 | preempt_enable(); | ||
2396 | } | ||
2397 | |||
2372 | /*** | 2398 | /*** |
2373 | * try_to_wake_up - wake up a thread | 2399 | * try_to_wake_up - wake up a thread |
2374 | * @p: the to-be-woken-up thread | 2400 | * @p: the to-be-woken-up thread |
@@ -2536,6 +2562,7 @@ static void __sched_fork(struct task_struct *p) | |||
2536 | p->se.exec_start = 0; | 2562 | p->se.exec_start = 0; |
2537 | p->se.sum_exec_runtime = 0; | 2563 | p->se.sum_exec_runtime = 0; |
2538 | p->se.prev_sum_exec_runtime = 0; | 2564 | p->se.prev_sum_exec_runtime = 0; |
2565 | p->se.nr_migrations = 0; | ||
2539 | p->se.last_wakeup = 0; | 2566 | p->se.last_wakeup = 0; |
2540 | p->se.avg_overlap = 0; | 2567 | p->se.avg_overlap = 0; |
2541 | p->se.start_runtime = 0; | 2568 | p->se.start_runtime = 0; |
@@ -2766,6 +2793,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2766 | */ | 2793 | */ |
2767 | prev_state = prev->state; | 2794 | prev_state = prev->state; |
2768 | finish_arch_switch(prev); | 2795 | finish_arch_switch(prev); |
2796 | perf_counter_task_sched_in(current, cpu_of(rq)); | ||
2769 | finish_lock_switch(rq, prev); | 2797 | finish_lock_switch(rq, prev); |
2770 | #ifdef CONFIG_SMP | 2798 | #ifdef CONFIG_SMP |
2771 | if (post_schedule) | 2799 | if (post_schedule) |
@@ -2981,6 +3009,15 @@ static void calc_load_account_active(struct rq *this_rq) | |||
2981 | } | 3009 | } |
2982 | 3010 | ||
2983 | /* | 3011 | /* |
3012 | * Externally visible per-cpu scheduler statistics: | ||
3013 | * cpu_nr_migrations(cpu) - number of migrations into that cpu | ||
3014 | */ | ||
3015 | u64 cpu_nr_migrations(int cpu) | ||
3016 | { | ||
3017 | return cpu_rq(cpu)->nr_migrations_in; | ||
3018 | } | ||
3019 | |||
3020 | /* | ||
2984 | * Update rq->cpu_load[] statistics. This function is usually called every | 3021 | * Update rq->cpu_load[] statistics. This function is usually called every |
2985 | * scheduler tick (TICK_NSEC). | 3022 | * scheduler tick (TICK_NSEC). |
2986 | */ | 3023 | */ |
@@ -5078,6 +5115,8 @@ void scheduler_tick(void) | |||
5078 | curr->sched_class->task_tick(rq, curr, 0); | 5115 | curr->sched_class->task_tick(rq, curr, 0); |
5079 | spin_unlock(&rq->lock); | 5116 | spin_unlock(&rq->lock); |
5080 | 5117 | ||
5118 | perf_counter_task_tick(curr, cpu); | ||
5119 | |||
5081 | #ifdef CONFIG_SMP | 5120 | #ifdef CONFIG_SMP |
5082 | rq->idle_at_tick = idle_cpu(cpu); | 5121 | rq->idle_at_tick = idle_cpu(cpu); |
5083 | trigger_load_balance(rq, cpu); | 5122 | trigger_load_balance(rq, cpu); |
@@ -5293,6 +5332,7 @@ need_resched_nonpreemptible: | |||
5293 | 5332 | ||
5294 | if (likely(prev != next)) { | 5333 | if (likely(prev != next)) { |
5295 | sched_info_switch(prev, next); | 5334 | sched_info_switch(prev, next); |
5335 | perf_counter_task_sched_out(prev, next, cpu); | ||
5296 | 5336 | ||
5297 | rq->nr_switches++; | 5337 | rq->nr_switches++; |
5298 | rq->curr = next; | 5338 | rq->curr = next; |
@@ -7536,8 +7576,10 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7536 | return NOTIFY_OK; | 7576 | return NOTIFY_OK; |
7537 | } | 7577 | } |
7538 | 7578 | ||
7539 | /* Register at highest priority so that task migration (migrate_all_tasks) | 7579 | /* |
7540 | * happens before everything else. | 7580 | * Register at high priority so that task migration (migrate_all_tasks) |
7581 | * happens before everything else. This has to be lower priority than | ||
7582 | * the notifier in the perf_counter subsystem, though. | ||
7541 | */ | 7583 | */ |
7542 | static struct notifier_block __cpuinitdata migration_notifier = { | 7584 | static struct notifier_block __cpuinitdata migration_notifier = { |
7543 | .notifier_call = migration_call, | 7585 | .notifier_call = migration_call, |
@@ -7782,24 +7824,21 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
7782 | 7824 | ||
7783 | static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) | 7825 | static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) |
7784 | { | 7826 | { |
7827 | gfp_t gfp = GFP_KERNEL; | ||
7828 | |||
7785 | memset(rd, 0, sizeof(*rd)); | 7829 | memset(rd, 0, sizeof(*rd)); |
7786 | 7830 | ||
7787 | if (bootmem) { | 7831 | if (bootmem) |
7788 | alloc_bootmem_cpumask_var(&def_root_domain.span); | 7832 | gfp = GFP_NOWAIT; |
7789 | alloc_bootmem_cpumask_var(&def_root_domain.online); | ||
7790 | alloc_bootmem_cpumask_var(&def_root_domain.rto_mask); | ||
7791 | cpupri_init(&rd->cpupri, true); | ||
7792 | return 0; | ||
7793 | } | ||
7794 | 7833 | ||
7795 | if (!alloc_cpumask_var(&rd->span, GFP_KERNEL)) | 7834 | if (!alloc_cpumask_var(&rd->span, gfp)) |
7796 | goto out; | 7835 | goto out; |
7797 | if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) | 7836 | if (!alloc_cpumask_var(&rd->online, gfp)) |
7798 | goto free_span; | 7837 | goto free_span; |
7799 | if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) | 7838 | if (!alloc_cpumask_var(&rd->rto_mask, gfp)) |
7800 | goto free_online; | 7839 | goto free_online; |
7801 | 7840 | ||
7802 | if (cpupri_init(&rd->cpupri, false) != 0) | 7841 | if (cpupri_init(&rd->cpupri, bootmem) != 0) |
7803 | goto free_rto_mask; | 7842 | goto free_rto_mask; |
7804 | return 0; | 7843 | return 0; |
7805 | 7844 | ||
@@ -9123,7 +9162,7 @@ void __init sched_init(void) | |||
9123 | * we use alloc_bootmem(). | 9162 | * we use alloc_bootmem(). |
9124 | */ | 9163 | */ |
9125 | if (alloc_size) { | 9164 | if (alloc_size) { |
9126 | ptr = (unsigned long)alloc_bootmem(alloc_size); | 9165 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); |
9127 | 9166 | ||
9128 | #ifdef CONFIG_FAIR_GROUP_SCHED | 9167 | #ifdef CONFIG_FAIR_GROUP_SCHED |
9129 | init_task_group.se = (struct sched_entity **)ptr; | 9168 | init_task_group.se = (struct sched_entity **)ptr; |
@@ -9218,7 +9257,7 @@ void __init sched_init(void) | |||
9218 | * 1024) and two child groups A0 and A1 (of weight 1024 each), | 9257 | * 1024) and two child groups A0 and A1 (of weight 1024 each), |
9219 | * then A0's share of the cpu resource is: | 9258 | * then A0's share of the cpu resource is: |
9220 | * | 9259 | * |
9221 | * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% | 9260 | * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% |
9222 | * | 9261 | * |
9223 | * We achieve this by letting init_task_group's tasks sit | 9262 | * We achieve this by letting init_task_group's tasks sit |
9224 | * directly in rq->cfs (i.e init_task_group->se[] = NULL). | 9263 | * directly in rq->cfs (i.e init_task_group->se[] = NULL). |
@@ -9314,15 +9353,17 @@ void __init sched_init(void) | |||
9314 | current->sched_class = &fair_sched_class; | 9353 | current->sched_class = &fair_sched_class; |
9315 | 9354 | ||
9316 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ | 9355 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ |
9317 | alloc_bootmem_cpumask_var(&nohz_cpu_mask); | 9356 | alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); |
9318 | #ifdef CONFIG_SMP | 9357 | #ifdef CONFIG_SMP |
9319 | #ifdef CONFIG_NO_HZ | 9358 | #ifdef CONFIG_NO_HZ |
9320 | alloc_bootmem_cpumask_var(&nohz.cpu_mask); | 9359 | alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); |
9321 | alloc_bootmem_cpumask_var(&nohz.ilb_grp_nohz_mask); | 9360 | alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); |
9322 | #endif | 9361 | #endif |
9323 | alloc_bootmem_cpumask_var(&cpu_isolated_map); | 9362 | alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); |
9324 | #endif /* SMP */ | 9363 | #endif /* SMP */ |
9325 | 9364 | ||
9365 | perf_counter_init(); | ||
9366 | |||
9326 | scheduler_running = 1; | 9367 | scheduler_running = 1; |
9327 | } | 9368 | } |
9328 | 9369 | ||
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index 344712a5e3ed..7deffc9f0e5f 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c | |||
@@ -154,8 +154,12 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) | |||
154 | */ | 154 | */ |
155 | int __init_refok cpupri_init(struct cpupri *cp, bool bootmem) | 155 | int __init_refok cpupri_init(struct cpupri *cp, bool bootmem) |
156 | { | 156 | { |
157 | gfp_t gfp = GFP_KERNEL; | ||
157 | int i; | 158 | int i; |
158 | 159 | ||
160 | if (bootmem) | ||
161 | gfp = GFP_NOWAIT; | ||
162 | |||
159 | memset(cp, 0, sizeof(*cp)); | 163 | memset(cp, 0, sizeof(*cp)); |
160 | 164 | ||
161 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { | 165 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { |
@@ -163,9 +167,7 @@ int __init_refok cpupri_init(struct cpupri *cp, bool bootmem) | |||
163 | 167 | ||
164 | spin_lock_init(&vec->lock); | 168 | spin_lock_init(&vec->lock); |
165 | vec->count = 0; | 169 | vec->count = 0; |
166 | if (bootmem) | 170 | if (!zalloc_cpumask_var(&vec->mask, gfp)) |
167 | alloc_bootmem_cpumask_var(&vec->mask); | ||
168 | else if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL)) | ||
169 | goto cleanup; | 171 | goto cleanup; |
170 | } | 172 | } |
171 | 173 | ||
diff --git a/kernel/slow-work.c b/kernel/slow-work.c index b28d19135f43..521ed2004d63 100644 --- a/kernel/slow-work.c +++ b/kernel/slow-work.c | |||
@@ -372,8 +372,8 @@ static int slow_work_thread(void *_data) | |||
372 | vsmax *= atomic_read(&slow_work_thread_count); | 372 | vsmax *= atomic_read(&slow_work_thread_count); |
373 | vsmax /= 100; | 373 | vsmax /= 100; |
374 | 374 | ||
375 | prepare_to_wait(&slow_work_thread_wq, &wait, | 375 | prepare_to_wait_exclusive(&slow_work_thread_wq, &wait, |
376 | TASK_INTERRUPTIBLE); | 376 | TASK_INTERRUPTIBLE); |
377 | if (!freezing(current) && | 377 | if (!freezing(current) && |
378 | !slow_work_threads_should_exit && | 378 | !slow_work_threads_should_exit && |
379 | !slow_work_available(vsmax) && | 379 | !slow_work_available(vsmax) && |
diff --git a/kernel/sys.c b/kernel/sys.c index e7998cf31498..438d99a38c87 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/prctl.h> | 14 | #include <linux/prctl.h> |
15 | #include <linux/highuid.h> | 15 | #include <linux/highuid.h> |
16 | #include <linux/fs.h> | 16 | #include <linux/fs.h> |
17 | #include <linux/perf_counter.h> | ||
17 | #include <linux/resource.h> | 18 | #include <linux/resource.h> |
18 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
19 | #include <linux/kexec.h> | 20 | #include <linux/kexec.h> |
@@ -1793,6 +1794,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
1793 | case PR_SET_TSC: | 1794 | case PR_SET_TSC: |
1794 | error = SET_TSC_CTL(arg2); | 1795 | error = SET_TSC_CTL(arg2); |
1795 | break; | 1796 | break; |
1797 | case PR_TASK_PERF_COUNTERS_DISABLE: | ||
1798 | error = perf_counter_task_disable(); | ||
1799 | break; | ||
1800 | case PR_TASK_PERF_COUNTERS_ENABLE: | ||
1801 | error = perf_counter_task_enable(); | ||
1802 | break; | ||
1796 | case PR_GET_TIMERSLACK: | 1803 | case PR_GET_TIMERSLACK: |
1797 | error = current->timer_slack_ns; | 1804 | error = current->timer_slack_ns; |
1798 | break; | 1805 | break; |
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 27dad2967387..68320f6b07b5 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c | |||
@@ -175,3 +175,6 @@ cond_syscall(compat_sys_timerfd_settime); | |||
175 | cond_syscall(compat_sys_timerfd_gettime); | 175 | cond_syscall(compat_sys_timerfd_gettime); |
176 | cond_syscall(sys_eventfd); | 176 | cond_syscall(sys_eventfd); |
177 | cond_syscall(sys_eventfd2); | 177 | cond_syscall(sys_eventfd2); |
178 | |||
179 | /* performance counters: */ | ||
180 | cond_syscall(sys_perf_counter_open); | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 944ba03cae19..ce664f98e3fb 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -49,6 +49,7 @@ | |||
49 | #include <linux/reboot.h> | 49 | #include <linux/reboot.h> |
50 | #include <linux/ftrace.h> | 50 | #include <linux/ftrace.h> |
51 | #include <linux/slow-work.h> | 51 | #include <linux/slow-work.h> |
52 | #include <linux/perf_counter.h> | ||
52 | 53 | ||
53 | #include <asm/uaccess.h> | 54 | #include <asm/uaccess.h> |
54 | #include <asm/processor.h> | 55 | #include <asm/processor.h> |
@@ -932,6 +933,32 @@ static struct ctl_table kern_table[] = { | |||
932 | .child = slow_work_sysctls, | 933 | .child = slow_work_sysctls, |
933 | }, | 934 | }, |
934 | #endif | 935 | #endif |
936 | #ifdef CONFIG_PERF_COUNTERS | ||
937 | { | ||
938 | .ctl_name = CTL_UNNUMBERED, | ||
939 | .procname = "perf_counter_paranoid", | ||
940 | .data = &sysctl_perf_counter_paranoid, | ||
941 | .maxlen = sizeof(sysctl_perf_counter_paranoid), | ||
942 | .mode = 0644, | ||
943 | .proc_handler = &proc_dointvec, | ||
944 | }, | ||
945 | { | ||
946 | .ctl_name = CTL_UNNUMBERED, | ||
947 | .procname = "perf_counter_mlock_kb", | ||
948 | .data = &sysctl_perf_counter_mlock, | ||
949 | .maxlen = sizeof(sysctl_perf_counter_mlock), | ||
950 | .mode = 0644, | ||
951 | .proc_handler = &proc_dointvec, | ||
952 | }, | ||
953 | { | ||
954 | .ctl_name = CTL_UNNUMBERED, | ||
955 | .procname = "perf_counter_max_sample_rate", | ||
956 | .data = &sysctl_perf_counter_sample_rate, | ||
957 | .maxlen = sizeof(sysctl_perf_counter_sample_rate), | ||
958 | .mode = 0644, | ||
959 | .proc_handler = &proc_dointvec, | ||
960 | }, | ||
961 | #endif | ||
935 | /* | 962 | /* |
936 | * NOTE: do not add new entries to this table unless you have read | 963 | * NOTE: do not add new entries to this table unless you have read |
937 | * Documentation/sysctl/ctl_unnumbered.txt | 964 | * Documentation/sysctl/ctl_unnumbered.txt |
diff --git a/kernel/timer.c b/kernel/timer.c index a26ed294f938..c01e568935ea 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
38 | #include <linux/tick.h> | 38 | #include <linux/tick.h> |
39 | #include <linux/kallsyms.h> | 39 | #include <linux/kallsyms.h> |
40 | #include <linux/perf_counter.h> | ||
40 | 41 | ||
41 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
42 | #include <asm/unistd.h> | 43 | #include <asm/unistd.h> |
@@ -1129,6 +1130,8 @@ static void run_timer_softirq(struct softirq_action *h) | |||
1129 | { | 1130 | { |
1130 | struct tvec_base *base = __get_cpu_var(tvec_bases); | 1131 | struct tvec_base *base = __get_cpu_var(tvec_bases); |
1131 | 1132 | ||
1133 | perf_counter_do_pending(); | ||
1134 | |||
1132 | hrtimer_run_pending(); | 1135 | hrtimer_run_pending(); |
1133 | 1136 | ||
1134 | if (time_after_eq(jiffies, base->timer_jiffies)) | 1137 | if (time_after_eq(jiffies, base->timer_jiffies)) |
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6cdcf38f2da9..116a35051be6 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -336,6 +336,38 @@ config SLUB_STATS | |||
336 | out which slabs are relevant to a particular load. | 336 | out which slabs are relevant to a particular load. |
337 | Try running: slabinfo -DA | 337 | Try running: slabinfo -DA |
338 | 338 | ||
339 | config DEBUG_KMEMLEAK | ||
340 | bool "Kernel memory leak detector" | ||
341 | depends on DEBUG_KERNEL && EXPERIMENTAL && (X86 || ARM) && \ | ||
342 | !MEMORY_HOTPLUG | ||
343 | select DEBUG_SLAB if SLAB | ||
344 | select SLUB_DEBUG if SLUB | ||
345 | select DEBUG_FS if SYSFS | ||
346 | select STACKTRACE if STACKTRACE_SUPPORT | ||
347 | select KALLSYMS | ||
348 | help | ||
349 | Say Y here if you want to enable the memory leak | ||
350 | detector. The memory allocation/freeing is traced in a way | ||
351 | similar to the Boehm's conservative garbage collector, the | ||
352 | difference being that the orphan objects are not freed but | ||
353 | only shown in /sys/kernel/debug/kmemleak. Enabling this | ||
354 | feature will introduce an overhead to memory | ||
355 | allocations. See Documentation/kmemleak.txt for more | ||
356 | details. | ||
357 | |||
358 | In order to access the kmemleak file, debugfs needs to be | ||
359 | mounted (usually at /sys/kernel/debug). | ||
360 | |||
361 | config DEBUG_KMEMLEAK_TEST | ||
362 | tristate "Simple test for the kernel memory leak detector" | ||
363 | depends on DEBUG_KMEMLEAK | ||
364 | help | ||
365 | Say Y or M here to build a test for the kernel memory leak | ||
366 | detector. This option enables a module that explicitly leaks | ||
367 | memory. | ||
368 | |||
369 | If unsure, say N. | ||
370 | |||
339 | config DEBUG_PREEMPT | 371 | config DEBUG_PREEMPT |
340 | bool "Debug preemptible kernel" | 372 | bool "Debug preemptible kernel" |
341 | depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64) | 373 | depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64) |
diff --git a/lib/cpumask.c b/lib/cpumask.c index eb23aaa0c7b8..7bb4142a502f 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c | |||
@@ -92,15 +92,8 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) | |||
92 | */ | 92 | */ |
93 | bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) | 93 | bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) |
94 | { | 94 | { |
95 | if (likely(slab_is_available())) | 95 | *mask = kmalloc_node(cpumask_size(), flags, node); |
96 | *mask = kmalloc_node(cpumask_size(), flags, node); | 96 | |
97 | else { | ||
98 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | ||
99 | printk(KERN_ERR | ||
100 | "=> alloc_cpumask_var: kmalloc not available!\n"); | ||
101 | #endif | ||
102 | *mask = NULL; | ||
103 | } | ||
104 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | 97 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS |
105 | if (!*mask) { | 98 | if (!*mask) { |
106 | printk(KERN_ERR "=> alloc_cpumask_var: failed!\n"); | 99 | printk(KERN_ERR "=> alloc_cpumask_var: failed!\n"); |
diff --git a/mm/Makefile b/mm/Makefile index ec73c68b6015..e89acb090b4d 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -38,3 +38,5 @@ obj-$(CONFIG_SMP) += allocpercpu.o | |||
38 | endif | 38 | endif |
39 | obj-$(CONFIG_QUICKLIST) += quicklist.o | 39 | obj-$(CONFIG_QUICKLIST) += quicklist.o |
40 | obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o | 40 | obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o |
41 | obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o | ||
42 | obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o | ||
diff --git a/mm/bootmem.c b/mm/bootmem.c index daf92713f7de..282df0a09e6f 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -532,6 +532,9 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata, | |||
532 | unsigned long size, unsigned long align, | 532 | unsigned long size, unsigned long align, |
533 | unsigned long goal, unsigned long limit) | 533 | unsigned long goal, unsigned long limit) |
534 | { | 534 | { |
535 | if (WARN_ON_ONCE(slab_is_available())) | ||
536 | return kzalloc(size, GFP_NOWAIT); | ||
537 | |||
535 | #ifdef CONFIG_HAVE_ARCH_BOOTMEM | 538 | #ifdef CONFIG_HAVE_ARCH_BOOTMEM |
536 | bootmem_data_t *p_bdata; | 539 | bootmem_data_t *p_bdata; |
537 | 540 | ||
@@ -662,6 +665,9 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, | |||
662 | void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, | 665 | void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, |
663 | unsigned long align, unsigned long goal) | 666 | unsigned long align, unsigned long goal) |
664 | { | 667 | { |
668 | if (WARN_ON_ONCE(slab_is_available())) | ||
669 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | ||
670 | |||
665 | return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); | 671 | return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); |
666 | } | 672 | } |
667 | 673 | ||
@@ -693,6 +699,9 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, | |||
693 | { | 699 | { |
694 | void *ptr; | 700 | void *ptr; |
695 | 701 | ||
702 | if (WARN_ON_ONCE(slab_is_available())) | ||
703 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | ||
704 | |||
696 | ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); | 705 | ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); |
697 | if (ptr) | 706 | if (ptr) |
698 | return ptr; | 707 | return ptr; |
@@ -745,6 +754,9 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, | |||
745 | void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, | 754 | void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, |
746 | unsigned long align, unsigned long goal) | 755 | unsigned long align, unsigned long goal) |
747 | { | 756 | { |
757 | if (WARN_ON_ONCE(slab_is_available())) | ||
758 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | ||
759 | |||
748 | return ___alloc_bootmem_node(pgdat->bdata, size, align, | 760 | return ___alloc_bootmem_node(pgdat->bdata, size, align, |
749 | goal, ARCH_LOW_ADDRESS_LIMIT); | 761 | goal, ARCH_LOW_ADDRESS_LIMIT); |
750 | } | 762 | } |
diff --git a/mm/kmemleak-test.c b/mm/kmemleak-test.c new file mode 100644 index 000000000000..d5292fc6f523 --- /dev/null +++ b/mm/kmemleak-test.c | |||
@@ -0,0 +1,111 @@ | |||
1 | /* | ||
2 | * mm/kmemleak-test.c | ||
3 | * | ||
4 | * Copyright (C) 2008 ARM Limited | ||
5 | * Written by Catalin Marinas <catalin.marinas@arm.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
19 | */ | ||
20 | |||
21 | #include <linux/init.h> | ||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/module.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/vmalloc.h> | ||
26 | #include <linux/list.h> | ||
27 | #include <linux/percpu.h> | ||
28 | #include <linux/fdtable.h> | ||
29 | |||
30 | #include <linux/kmemleak.h> | ||
31 | |||
32 | struct test_node { | ||
33 | long header[25]; | ||
34 | struct list_head list; | ||
35 | long footer[25]; | ||
36 | }; | ||
37 | |||
38 | static LIST_HEAD(test_list); | ||
39 | static DEFINE_PER_CPU(void *, test_pointer); | ||
40 | |||
41 | /* | ||
42 | * Some very simple testing. This function needs to be extended for | ||
43 | * proper testing. | ||
44 | */ | ||
45 | static int __init kmemleak_test_init(void) | ||
46 | { | ||
47 | struct test_node *elem; | ||
48 | int i; | ||
49 | |||
50 | printk(KERN_INFO "Kmemleak testing\n"); | ||
51 | |||
52 | /* make some orphan objects */ | ||
53 | pr_info("kmemleak: kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL)); | ||
54 | pr_info("kmemleak: kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL)); | ||
55 | pr_info("kmemleak: kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL)); | ||
56 | pr_info("kmemleak: kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL)); | ||
57 | pr_info("kmemleak: kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL)); | ||
58 | pr_info("kmemleak: kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL)); | ||
59 | pr_info("kmemleak: kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL)); | ||
60 | pr_info("kmemleak: kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL)); | ||
61 | #ifndef CONFIG_MODULES | ||
62 | pr_info("kmemleak: kmem_cache_alloc(files_cachep) = %p\n", | ||
63 | kmem_cache_alloc(files_cachep, GFP_KERNEL)); | ||
64 | pr_info("kmemleak: kmem_cache_alloc(files_cachep) = %p\n", | ||
65 | kmem_cache_alloc(files_cachep, GFP_KERNEL)); | ||
66 | #endif | ||
67 | pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64)); | ||
68 | pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64)); | ||
69 | pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64)); | ||
70 | pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64)); | ||
71 | pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64)); | ||
72 | |||
73 | /* | ||
74 | * Add elements to a list. They should only appear as orphan | ||
75 | * after the module is removed. | ||
76 | */ | ||
77 | for (i = 0; i < 10; i++) { | ||
78 | elem = kmalloc(sizeof(*elem), GFP_KERNEL); | ||
79 | pr_info("kmemleak: kmalloc(sizeof(*elem)) = %p\n", elem); | ||
80 | if (!elem) | ||
81 | return -ENOMEM; | ||
82 | memset(elem, 0, sizeof(*elem)); | ||
83 | INIT_LIST_HEAD(&elem->list); | ||
84 | |||
85 | list_add_tail(&elem->list, &test_list); | ||
86 | } | ||
87 | |||
88 | for_each_possible_cpu(i) { | ||
89 | per_cpu(test_pointer, i) = kmalloc(129, GFP_KERNEL); | ||
90 | pr_info("kmemleak: kmalloc(129) = %p\n", | ||
91 | per_cpu(test_pointer, i)); | ||
92 | } | ||
93 | |||
94 | return 0; | ||
95 | } | ||
96 | module_init(kmemleak_test_init); | ||
97 | |||
98 | static void __exit kmemleak_test_exit(void) | ||
99 | { | ||
100 | struct test_node *elem, *tmp; | ||
101 | |||
102 | /* | ||
103 | * Remove the list elements without actually freeing the | ||
104 | * memory. | ||
105 | */ | ||
106 | list_for_each_entry_safe(elem, tmp, &test_list, list) | ||
107 | list_del(&elem->list); | ||
108 | } | ||
109 | module_exit(kmemleak_test_exit); | ||
110 | |||
111 | MODULE_LICENSE("GPL"); | ||
diff --git a/mm/kmemleak.c b/mm/kmemleak.c new file mode 100644 index 000000000000..58ec86c9e58a --- /dev/null +++ b/mm/kmemleak.c | |||
@@ -0,0 +1,1498 @@ | |||
1 | /* | ||
2 | * mm/kmemleak.c | ||
3 | * | ||
4 | * Copyright (C) 2008 ARM Limited | ||
5 | * Written by Catalin Marinas <catalin.marinas@arm.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License version 2 as | ||
9 | * published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
19 | * | ||
20 | * | ||
21 | * For more information on the algorithm and kmemleak usage, please see | ||
22 | * Documentation/kmemleak.txt. | ||
23 | * | ||
24 | * Notes on locking | ||
25 | * ---------------- | ||
26 | * | ||
27 | * The following locks and mutexes are used by kmemleak: | ||
28 | * | ||
29 | * - kmemleak_lock (rwlock): protects the object_list modifications and | ||
30 | * accesses to the object_tree_root. The object_list is the main list | ||
31 | * holding the metadata (struct kmemleak_object) for the allocated memory | ||
32 | * blocks. The object_tree_root is a priority search tree used to look-up | ||
33 | * metadata based on a pointer to the corresponding memory block. The | ||
34 | * kmemleak_object structures are added to the object_list and | ||
35 | * object_tree_root in the create_object() function called from the | ||
36 | * kmemleak_alloc() callback and removed in delete_object() called from the | ||
37 | * kmemleak_free() callback | ||
38 | * - kmemleak_object.lock (spinlock): protects a kmemleak_object. Accesses to | ||
39 | * the metadata (e.g. count) are protected by this lock. Note that some | ||
40 | * members of this structure may be protected by other means (atomic or | ||
41 | * kmemleak_lock). This lock is also held when scanning the corresponding | ||
42 | * memory block to avoid the kernel freeing it via the kmemleak_free() | ||
43 | * callback. This is less heavyweight than holding a global lock like | ||
44 | * kmemleak_lock during scanning | ||
45 | * - scan_mutex (mutex): ensures that only one thread may scan the memory for | ||
46 | * unreferenced objects at a time. The gray_list contains the objects which | ||
47 | * are already referenced or marked as false positives and need to be | ||
48 | * scanned. This list is only modified during a scanning episode when the | ||
49 | * scan_mutex is held. At the end of a scan, the gray_list is always empty. | ||
50 | * Note that the kmemleak_object.use_count is incremented when an object is | ||
51 | * added to the gray_list and therefore cannot be freed | ||
52 | * - kmemleak_mutex (mutex): prevents multiple users of the "kmemleak" debugfs | ||
53 | * file together with modifications to the memory scanning parameters | ||
54 | * including the scan_thread pointer | ||
55 | * | ||
56 | * The kmemleak_object structures have a use_count incremented or decremented | ||
57 | * using the get_object()/put_object() functions. When the use_count becomes | ||
58 | * 0, this count can no longer be incremented and put_object() schedules the | ||
59 | * kmemleak_object freeing via an RCU callback. All calls to the get_object() | ||
60 | * function must be protected by rcu_read_lock() to avoid accessing a freed | ||
61 | * structure. | ||
62 | */ | ||
63 | |||
64 | #include <linux/init.h> | ||
65 | #include <linux/kernel.h> | ||
66 | #include <linux/list.h> | ||
67 | #include <linux/sched.h> | ||
68 | #include <linux/jiffies.h> | ||
69 | #include <linux/delay.h> | ||
70 | #include <linux/module.h> | ||
71 | #include <linux/kthread.h> | ||
72 | #include <linux/prio_tree.h> | ||
73 | #include <linux/gfp.h> | ||
74 | #include <linux/fs.h> | ||
75 | #include <linux/debugfs.h> | ||
76 | #include <linux/seq_file.h> | ||
77 | #include <linux/cpumask.h> | ||
78 | #include <linux/spinlock.h> | ||
79 | #include <linux/mutex.h> | ||
80 | #include <linux/rcupdate.h> | ||
81 | #include <linux/stacktrace.h> | ||
82 | #include <linux/cache.h> | ||
83 | #include <linux/percpu.h> | ||
84 | #include <linux/hardirq.h> | ||
85 | #include <linux/mmzone.h> | ||
86 | #include <linux/slab.h> | ||
87 | #include <linux/thread_info.h> | ||
88 | #include <linux/err.h> | ||
89 | #include <linux/uaccess.h> | ||
90 | #include <linux/string.h> | ||
91 | #include <linux/nodemask.h> | ||
92 | #include <linux/mm.h> | ||
93 | |||
94 | #include <asm/sections.h> | ||
95 | #include <asm/processor.h> | ||
96 | #include <asm/atomic.h> | ||
97 | |||
98 | #include <linux/kmemleak.h> | ||
99 | |||
100 | /* | ||
101 | * Kmemleak configuration and common defines. | ||
102 | */ | ||
103 | #define MAX_TRACE 16 /* stack trace length */ | ||
104 | #define REPORTS_NR 50 /* maximum number of reported leaks */ | ||
105 | #define MSECS_MIN_AGE 5000 /* minimum object age for reporting */ | ||
106 | #define MSECS_SCAN_YIELD 10 /* CPU yielding period */ | ||
107 | #define SECS_FIRST_SCAN 60 /* delay before the first scan */ | ||
108 | #define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ | ||
109 | |||
110 | #define BYTES_PER_POINTER sizeof(void *) | ||
111 | |||
112 | /* scanning area inside a memory block */ | ||
113 | struct kmemleak_scan_area { | ||
114 | struct hlist_node node; | ||
115 | unsigned long offset; | ||
116 | size_t length; | ||
117 | }; | ||
118 | |||
119 | /* | ||
120 | * Structure holding the metadata for each allocated memory block. | ||
121 | * Modifications to such objects should be made while holding the | ||
122 | * object->lock. Insertions or deletions from object_list, gray_list or | ||
123 | * tree_node are already protected by the corresponding locks or mutex (see | ||
124 | * the notes on locking above). These objects are reference-counted | ||
125 | * (use_count) and freed using the RCU mechanism. | ||
126 | */ | ||
127 | struct kmemleak_object { | ||
128 | spinlock_t lock; | ||
129 | unsigned long flags; /* object status flags */ | ||
130 | struct list_head object_list; | ||
131 | struct list_head gray_list; | ||
132 | struct prio_tree_node tree_node; | ||
133 | struct rcu_head rcu; /* object_list lockless traversal */ | ||
134 | /* object usage count; object freed when use_count == 0 */ | ||
135 | atomic_t use_count; | ||
136 | unsigned long pointer; | ||
137 | size_t size; | ||
138 | /* minimum number of a pointers found before it is considered leak */ | ||
139 | int min_count; | ||
140 | /* the total number of pointers found pointing to this object */ | ||
141 | int count; | ||
142 | /* memory ranges to be scanned inside an object (empty for all) */ | ||
143 | struct hlist_head area_list; | ||
144 | unsigned long trace[MAX_TRACE]; | ||
145 | unsigned int trace_len; | ||
146 | unsigned long jiffies; /* creation timestamp */ | ||
147 | pid_t pid; /* pid of the current task */ | ||
148 | char comm[TASK_COMM_LEN]; /* executable name */ | ||
149 | }; | ||
150 | |||
151 | /* flag representing the memory block allocation status */ | ||
152 | #define OBJECT_ALLOCATED (1 << 0) | ||
153 | /* flag set after the first reporting of an unreference object */ | ||
154 | #define OBJECT_REPORTED (1 << 1) | ||
155 | /* flag set to not scan the object */ | ||
156 | #define OBJECT_NO_SCAN (1 << 2) | ||
157 | |||
158 | /* the list of all allocated objects */ | ||
159 | static LIST_HEAD(object_list); | ||
160 | /* the list of gray-colored objects (see color_gray comment below) */ | ||
161 | static LIST_HEAD(gray_list); | ||
162 | /* prio search tree for object boundaries */ | ||
163 | static struct prio_tree_root object_tree_root; | ||
164 | /* rw_lock protecting the access to object_list and prio_tree_root */ | ||
165 | static DEFINE_RWLOCK(kmemleak_lock); | ||
166 | |||
167 | /* allocation caches for kmemleak internal data */ | ||
168 | static struct kmem_cache *object_cache; | ||
169 | static struct kmem_cache *scan_area_cache; | ||
170 | |||
171 | /* set if tracing memory operations is enabled */ | ||
172 | static atomic_t kmemleak_enabled = ATOMIC_INIT(0); | ||
173 | /* set in the late_initcall if there were no errors */ | ||
174 | static atomic_t kmemleak_initialized = ATOMIC_INIT(0); | ||
175 | /* enables or disables early logging of the memory operations */ | ||
176 | static atomic_t kmemleak_early_log = ATOMIC_INIT(1); | ||
177 | /* set if a fata kmemleak error has occurred */ | ||
178 | static atomic_t kmemleak_error = ATOMIC_INIT(0); | ||
179 | |||
180 | /* minimum and maximum address that may be valid pointers */ | ||
181 | static unsigned long min_addr = ULONG_MAX; | ||
182 | static unsigned long max_addr; | ||
183 | |||
184 | /* used for yielding the CPU to other tasks during scanning */ | ||
185 | static unsigned long next_scan_yield; | ||
186 | static struct task_struct *scan_thread; | ||
187 | static unsigned long jiffies_scan_yield; | ||
188 | static unsigned long jiffies_min_age; | ||
189 | /* delay between automatic memory scannings */ | ||
190 | static signed long jiffies_scan_wait; | ||
191 | /* enables or disables the task stacks scanning */ | ||
192 | static int kmemleak_stack_scan; | ||
193 | /* mutex protecting the memory scanning */ | ||
194 | static DEFINE_MUTEX(scan_mutex); | ||
195 | /* mutex protecting the access to the /sys/kernel/debug/kmemleak file */ | ||
196 | static DEFINE_MUTEX(kmemleak_mutex); | ||
197 | |||
198 | /* number of leaks reported (for limitation purposes) */ | ||
199 | static int reported_leaks; | ||
200 | |||
201 | /* | ||
202 | * Early object allocation/freeing logging. Kkmemleak is initialized after the | ||
203 | * kernel allocator. However, both the kernel allocator and kmemleak may | ||
204 | * allocate memory blocks which need to be tracked. Kkmemleak defines an | ||
205 | * arbitrary buffer to hold the allocation/freeing information before it is | ||
206 | * fully initialized. | ||
207 | */ | ||
208 | |||
209 | /* kmemleak operation type for early logging */ | ||
210 | enum { | ||
211 | KMEMLEAK_ALLOC, | ||
212 | KMEMLEAK_FREE, | ||
213 | KMEMLEAK_NOT_LEAK, | ||
214 | KMEMLEAK_IGNORE, | ||
215 | KMEMLEAK_SCAN_AREA, | ||
216 | KMEMLEAK_NO_SCAN | ||
217 | }; | ||
218 | |||
219 | /* | ||
220 | * Structure holding the information passed to kmemleak callbacks during the | ||
221 | * early logging. | ||
222 | */ | ||
223 | struct early_log { | ||
224 | int op_type; /* kmemleak operation type */ | ||
225 | const void *ptr; /* allocated/freed memory block */ | ||
226 | size_t size; /* memory block size */ | ||
227 | int min_count; /* minimum reference count */ | ||
228 | unsigned long offset; /* scan area offset */ | ||
229 | size_t length; /* scan area length */ | ||
230 | }; | ||
231 | |||
232 | /* early logging buffer and current position */ | ||
233 | static struct early_log early_log[200]; | ||
234 | static int crt_early_log; | ||
235 | |||
236 | static void kmemleak_disable(void); | ||
237 | |||
238 | /* | ||
239 | * Print a warning and dump the stack trace. | ||
240 | */ | ||
241 | #define kmemleak_warn(x...) do { \ | ||
242 | pr_warning(x); \ | ||
243 | dump_stack(); \ | ||
244 | } while (0) | ||
245 | |||
246 | /* | ||
247 | * Macro invoked when a serious kmemleak condition occured and cannot be | ||
248 | * recovered from. Kkmemleak will be disabled and further allocation/freeing | ||
249 | * tracing no longer available. | ||
250 | */ | ||
251 | #define kmemleak_panic(x...) do { \ | ||
252 | kmemleak_warn(x); \ | ||
253 | kmemleak_disable(); \ | ||
254 | } while (0) | ||
255 | |||
256 | /* | ||
257 | * Object colors, encoded with count and min_count: | ||
258 | * - white - orphan object, not enough references to it (count < min_count) | ||
259 | * - gray - not orphan, not marked as false positive (min_count == 0) or | ||
260 | * sufficient references to it (count >= min_count) | ||
261 | * - black - ignore, it doesn't contain references (e.g. text section) | ||
262 | * (min_count == -1). No function defined for this color. | ||
263 | * Newly created objects don't have any color assigned (object->count == -1) | ||
264 | * before the next memory scan when they become white. | ||
265 | */ | ||
266 | static int color_white(const struct kmemleak_object *object) | ||
267 | { | ||
268 | return object->count != -1 && object->count < object->min_count; | ||
269 | } | ||
270 | |||
271 | static int color_gray(const struct kmemleak_object *object) | ||
272 | { | ||
273 | return object->min_count != -1 && object->count >= object->min_count; | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * Objects are considered referenced if their color is gray and they have not | ||
278 | * been deleted. | ||
279 | */ | ||
280 | static int referenced_object(struct kmemleak_object *object) | ||
281 | { | ||
282 | return (object->flags & OBJECT_ALLOCATED) && color_gray(object); | ||
283 | } | ||
284 | |||
285 | /* | ||
286 | * Objects are considered unreferenced only if their color is white, they have | ||
287 | * not be deleted and have a minimum age to avoid false positives caused by | ||
288 | * pointers temporarily stored in CPU registers. | ||
289 | */ | ||
290 | static int unreferenced_object(struct kmemleak_object *object) | ||
291 | { | ||
292 | return (object->flags & OBJECT_ALLOCATED) && color_white(object) && | ||
293 | time_is_before_eq_jiffies(object->jiffies + jiffies_min_age); | ||
294 | } | ||
295 | |||
296 | /* | ||
297 | * Printing of the (un)referenced objects information, either to the seq file | ||
298 | * or to the kernel log. The print_referenced/print_unreferenced functions | ||
299 | * must be called with the object->lock held. | ||
300 | */ | ||
301 | #define print_helper(seq, x...) do { \ | ||
302 | struct seq_file *s = (seq); \ | ||
303 | if (s) \ | ||
304 | seq_printf(s, x); \ | ||
305 | else \ | ||
306 | pr_info(x); \ | ||
307 | } while (0) | ||
308 | |||
309 | static void print_referenced(struct kmemleak_object *object) | ||
310 | { | ||
311 | pr_info("kmemleak: referenced object 0x%08lx (size %zu)\n", | ||
312 | object->pointer, object->size); | ||
313 | } | ||
314 | |||
315 | static void print_unreferenced(struct seq_file *seq, | ||
316 | struct kmemleak_object *object) | ||
317 | { | ||
318 | int i; | ||
319 | |||
320 | print_helper(seq, "kmemleak: unreferenced object 0x%08lx (size %zu):\n", | ||
321 | object->pointer, object->size); | ||
322 | print_helper(seq, " comm \"%s\", pid %d, jiffies %lu\n", | ||
323 | object->comm, object->pid, object->jiffies); | ||
324 | print_helper(seq, " backtrace:\n"); | ||
325 | |||
326 | for (i = 0; i < object->trace_len; i++) { | ||
327 | void *ptr = (void *)object->trace[i]; | ||
328 | print_helper(seq, " [<%p>] %pS\n", ptr, ptr); | ||
329 | } | ||
330 | } | ||
331 | |||
332 | /* | ||
333 | * Print the kmemleak_object information. This function is used mainly for | ||
334 | * debugging special cases when kmemleak operations. It must be called with | ||
335 | * the object->lock held. | ||
336 | */ | ||
337 | static void dump_object_info(struct kmemleak_object *object) | ||
338 | { | ||
339 | struct stack_trace trace; | ||
340 | |||
341 | trace.nr_entries = object->trace_len; | ||
342 | trace.entries = object->trace; | ||
343 | |||
344 | pr_notice("kmemleak: Object 0x%08lx (size %zu):\n", | ||
345 | object->tree_node.start, object->size); | ||
346 | pr_notice(" comm \"%s\", pid %d, jiffies %lu\n", | ||
347 | object->comm, object->pid, object->jiffies); | ||
348 | pr_notice(" min_count = %d\n", object->min_count); | ||
349 | pr_notice(" count = %d\n", object->count); | ||
350 | pr_notice(" backtrace:\n"); | ||
351 | print_stack_trace(&trace, 4); | ||
352 | } | ||
353 | |||
354 | /* | ||
355 | * Look-up a memory block metadata (kmemleak_object) in the priority search | ||
356 | * tree based on a pointer value. If alias is 0, only values pointing to the | ||
357 | * beginning of the memory block are allowed. The kmemleak_lock must be held | ||
358 | * when calling this function. | ||
359 | */ | ||
360 | static struct kmemleak_object *lookup_object(unsigned long ptr, int alias) | ||
361 | { | ||
362 | struct prio_tree_node *node; | ||
363 | struct prio_tree_iter iter; | ||
364 | struct kmemleak_object *object; | ||
365 | |||
366 | prio_tree_iter_init(&iter, &object_tree_root, ptr, ptr); | ||
367 | node = prio_tree_next(&iter); | ||
368 | if (node) { | ||
369 | object = prio_tree_entry(node, struct kmemleak_object, | ||
370 | tree_node); | ||
371 | if (!alias && object->pointer != ptr) { | ||
372 | kmemleak_warn("kmemleak: Found object by alias"); | ||
373 | object = NULL; | ||
374 | } | ||
375 | } else | ||
376 | object = NULL; | ||
377 | |||
378 | return object; | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * Increment the object use_count. Return 1 if successful or 0 otherwise. Note | ||
383 | * that once an object's use_count reached 0, the RCU freeing was already | ||
384 | * registered and the object should no longer be used. This function must be | ||
385 | * called under the protection of rcu_read_lock(). | ||
386 | */ | ||
387 | static int get_object(struct kmemleak_object *object) | ||
388 | { | ||
389 | return atomic_inc_not_zero(&object->use_count); | ||
390 | } | ||
391 | |||
392 | /* | ||
393 | * RCU callback to free a kmemleak_object. | ||
394 | */ | ||
395 | static void free_object_rcu(struct rcu_head *rcu) | ||
396 | { | ||
397 | struct hlist_node *elem, *tmp; | ||
398 | struct kmemleak_scan_area *area; | ||
399 | struct kmemleak_object *object = | ||
400 | container_of(rcu, struct kmemleak_object, rcu); | ||
401 | |||
402 | /* | ||
403 | * Once use_count is 0 (guaranteed by put_object), there is no other | ||
404 | * code accessing this object, hence no need for locking. | ||
405 | */ | ||
406 | hlist_for_each_entry_safe(area, elem, tmp, &object->area_list, node) { | ||
407 | hlist_del(elem); | ||
408 | kmem_cache_free(scan_area_cache, area); | ||
409 | } | ||
410 | kmem_cache_free(object_cache, object); | ||
411 | } | ||
412 | |||
413 | /* | ||
414 | * Decrement the object use_count. Once the count is 0, free the object using | ||
415 | * an RCU callback. Since put_object() may be called via the kmemleak_free() -> | ||
416 | * delete_object() path, the delayed RCU freeing ensures that there is no | ||
417 | * recursive call to the kernel allocator. Lock-less RCU object_list traversal | ||
418 | * is also possible. | ||
419 | */ | ||
420 | static void put_object(struct kmemleak_object *object) | ||
421 | { | ||
422 | if (!atomic_dec_and_test(&object->use_count)) | ||
423 | return; | ||
424 | |||
425 | /* should only get here after delete_object was called */ | ||
426 | WARN_ON(object->flags & OBJECT_ALLOCATED); | ||
427 | |||
428 | call_rcu(&object->rcu, free_object_rcu); | ||
429 | } | ||
430 | |||
431 | /* | ||
432 | * Look up an object in the prio search tree and increase its use_count. | ||
433 | */ | ||
434 | static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias) | ||
435 | { | ||
436 | unsigned long flags; | ||
437 | struct kmemleak_object *object = NULL; | ||
438 | |||
439 | rcu_read_lock(); | ||
440 | read_lock_irqsave(&kmemleak_lock, flags); | ||
441 | if (ptr >= min_addr && ptr < max_addr) | ||
442 | object = lookup_object(ptr, alias); | ||
443 | read_unlock_irqrestore(&kmemleak_lock, flags); | ||
444 | |||
445 | /* check whether the object is still available */ | ||
446 | if (object && !get_object(object)) | ||
447 | object = NULL; | ||
448 | rcu_read_unlock(); | ||
449 | |||
450 | return object; | ||
451 | } | ||
452 | |||
453 | /* | ||
454 | * Create the metadata (struct kmemleak_object) corresponding to an allocated | ||
455 | * memory block and add it to the object_list and object_tree_root. | ||
456 | */ | ||
457 | static void create_object(unsigned long ptr, size_t size, int min_count, | ||
458 | gfp_t gfp) | ||
459 | { | ||
460 | unsigned long flags; | ||
461 | struct kmemleak_object *object; | ||
462 | struct prio_tree_node *node; | ||
463 | struct stack_trace trace; | ||
464 | |||
465 | object = kmem_cache_alloc(object_cache, gfp & ~GFP_SLAB_BUG_MASK); | ||
466 | if (!object) { | ||
467 | kmemleak_panic("kmemleak: Cannot allocate a kmemleak_object " | ||
468 | "structure\n"); | ||
469 | return; | ||
470 | } | ||
471 | |||
472 | INIT_LIST_HEAD(&object->object_list); | ||
473 | INIT_LIST_HEAD(&object->gray_list); | ||
474 | INIT_HLIST_HEAD(&object->area_list); | ||
475 | spin_lock_init(&object->lock); | ||
476 | atomic_set(&object->use_count, 1); | ||
477 | object->flags = OBJECT_ALLOCATED; | ||
478 | object->pointer = ptr; | ||
479 | object->size = size; | ||
480 | object->min_count = min_count; | ||
481 | object->count = -1; /* no color initially */ | ||
482 | object->jiffies = jiffies; | ||
483 | |||
484 | /* task information */ | ||
485 | if (in_irq()) { | ||
486 | object->pid = 0; | ||
487 | strncpy(object->comm, "hardirq", sizeof(object->comm)); | ||
488 | } else if (in_softirq()) { | ||
489 | object->pid = 0; | ||
490 | strncpy(object->comm, "softirq", sizeof(object->comm)); | ||
491 | } else { | ||
492 | object->pid = current->pid; | ||
493 | /* | ||
494 | * There is a small chance of a race with set_task_comm(), | ||
495 | * however using get_task_comm() here may cause locking | ||
496 | * dependency issues with current->alloc_lock. In the worst | ||
497 | * case, the command line is not correct. | ||
498 | */ | ||
499 | strncpy(object->comm, current->comm, sizeof(object->comm)); | ||
500 | } | ||
501 | |||
502 | /* kernel backtrace */ | ||
503 | trace.max_entries = MAX_TRACE; | ||
504 | trace.nr_entries = 0; | ||
505 | trace.entries = object->trace; | ||
506 | trace.skip = 1; | ||
507 | save_stack_trace(&trace); | ||
508 | object->trace_len = trace.nr_entries; | ||
509 | |||
510 | INIT_PRIO_TREE_NODE(&object->tree_node); | ||
511 | object->tree_node.start = ptr; | ||
512 | object->tree_node.last = ptr + size - 1; | ||
513 | |||
514 | write_lock_irqsave(&kmemleak_lock, flags); | ||
515 | min_addr = min(min_addr, ptr); | ||
516 | max_addr = max(max_addr, ptr + size); | ||
517 | node = prio_tree_insert(&object_tree_root, &object->tree_node); | ||
518 | /* | ||
519 | * The code calling the kernel does not yet have the pointer to the | ||
520 | * memory block to be able to free it. However, we still hold the | ||
521 | * kmemleak_lock here in case parts of the kernel started freeing | ||
522 | * random memory blocks. | ||
523 | */ | ||
524 | if (node != &object->tree_node) { | ||
525 | unsigned long flags; | ||
526 | |||
527 | kmemleak_panic("kmemleak: Cannot insert 0x%lx into the object " | ||
528 | "search tree (already existing)\n", ptr); | ||
529 | object = lookup_object(ptr, 1); | ||
530 | spin_lock_irqsave(&object->lock, flags); | ||
531 | dump_object_info(object); | ||
532 | spin_unlock_irqrestore(&object->lock, flags); | ||
533 | |||
534 | goto out; | ||
535 | } | ||
536 | list_add_tail_rcu(&object->object_list, &object_list); | ||
537 | out: | ||
538 | write_unlock_irqrestore(&kmemleak_lock, flags); | ||
539 | } | ||
540 | |||
541 | /* | ||
542 | * Remove the metadata (struct kmemleak_object) for a memory block from the | ||
543 | * object_list and object_tree_root and decrement its use_count. | ||
544 | */ | ||
545 | static void delete_object(unsigned long ptr) | ||
546 | { | ||
547 | unsigned long flags; | ||
548 | struct kmemleak_object *object; | ||
549 | |||
550 | write_lock_irqsave(&kmemleak_lock, flags); | ||
551 | object = lookup_object(ptr, 0); | ||
552 | if (!object) { | ||
553 | kmemleak_warn("kmemleak: Freeing unknown object at 0x%08lx\n", | ||
554 | ptr); | ||
555 | write_unlock_irqrestore(&kmemleak_lock, flags); | ||
556 | return; | ||
557 | } | ||
558 | prio_tree_remove(&object_tree_root, &object->tree_node); | ||
559 | list_del_rcu(&object->object_list); | ||
560 | write_unlock_irqrestore(&kmemleak_lock, flags); | ||
561 | |||
562 | WARN_ON(!(object->flags & OBJECT_ALLOCATED)); | ||
563 | WARN_ON(atomic_read(&object->use_count) < 1); | ||
564 | |||
565 | /* | ||
566 | * Locking here also ensures that the corresponding memory block | ||
567 | * cannot be freed when it is being scanned. | ||
568 | */ | ||
569 | spin_lock_irqsave(&object->lock, flags); | ||
570 | if (object->flags & OBJECT_REPORTED) | ||
571 | print_referenced(object); | ||
572 | object->flags &= ~OBJECT_ALLOCATED; | ||
573 | spin_unlock_irqrestore(&object->lock, flags); | ||
574 | put_object(object); | ||
575 | } | ||
576 | |||
577 | /* | ||
578 | * Make a object permanently as gray-colored so that it can no longer be | ||
579 | * reported as a leak. This is used in general to mark a false positive. | ||
580 | */ | ||
581 | static void make_gray_object(unsigned long ptr) | ||
582 | { | ||
583 | unsigned long flags; | ||
584 | struct kmemleak_object *object; | ||
585 | |||
586 | object = find_and_get_object(ptr, 0); | ||
587 | if (!object) { | ||
588 | kmemleak_warn("kmemleak: Graying unknown object at 0x%08lx\n", | ||
589 | ptr); | ||
590 | return; | ||
591 | } | ||
592 | |||
593 | spin_lock_irqsave(&object->lock, flags); | ||
594 | object->min_count = 0; | ||
595 | spin_unlock_irqrestore(&object->lock, flags); | ||
596 | put_object(object); | ||
597 | } | ||
598 | |||
599 | /* | ||
600 | * Mark the object as black-colored so that it is ignored from scans and | ||
601 | * reporting. | ||
602 | */ | ||
603 | static void make_black_object(unsigned long ptr) | ||
604 | { | ||
605 | unsigned long flags; | ||
606 | struct kmemleak_object *object; | ||
607 | |||
608 | object = find_and_get_object(ptr, 0); | ||
609 | if (!object) { | ||
610 | kmemleak_warn("kmemleak: Blacking unknown object at 0x%08lx\n", | ||
611 | ptr); | ||
612 | return; | ||
613 | } | ||
614 | |||
615 | spin_lock_irqsave(&object->lock, flags); | ||
616 | object->min_count = -1; | ||
617 | spin_unlock_irqrestore(&object->lock, flags); | ||
618 | put_object(object); | ||
619 | } | ||
620 | |||
621 | /* | ||
622 | * Add a scanning area to the object. If at least one such area is added, | ||
623 | * kmemleak will only scan these ranges rather than the whole memory block. | ||
624 | */ | ||
625 | static void add_scan_area(unsigned long ptr, unsigned long offset, | ||
626 | size_t length, gfp_t gfp) | ||
627 | { | ||
628 | unsigned long flags; | ||
629 | struct kmemleak_object *object; | ||
630 | struct kmemleak_scan_area *area; | ||
631 | |||
632 | object = find_and_get_object(ptr, 0); | ||
633 | if (!object) { | ||
634 | kmemleak_warn("kmemleak: Adding scan area to unknown " | ||
635 | "object at 0x%08lx\n", ptr); | ||
636 | return; | ||
637 | } | ||
638 | |||
639 | area = kmem_cache_alloc(scan_area_cache, gfp & ~GFP_SLAB_BUG_MASK); | ||
640 | if (!area) { | ||
641 | kmemleak_warn("kmemleak: Cannot allocate a scan area\n"); | ||
642 | goto out; | ||
643 | } | ||
644 | |||
645 | spin_lock_irqsave(&object->lock, flags); | ||
646 | if (offset + length > object->size) { | ||
647 | kmemleak_warn("kmemleak: Scan area larger than object " | ||
648 | "0x%08lx\n", ptr); | ||
649 | dump_object_info(object); | ||
650 | kmem_cache_free(scan_area_cache, area); | ||
651 | goto out_unlock; | ||
652 | } | ||
653 | |||
654 | INIT_HLIST_NODE(&area->node); | ||
655 | area->offset = offset; | ||
656 | area->length = length; | ||
657 | |||
658 | hlist_add_head(&area->node, &object->area_list); | ||
659 | out_unlock: | ||
660 | spin_unlock_irqrestore(&object->lock, flags); | ||
661 | out: | ||
662 | put_object(object); | ||
663 | } | ||
664 | |||
665 | /* | ||
666 | * Set the OBJECT_NO_SCAN flag for the object corresponding to the give | ||
667 | * pointer. Such object will not be scanned by kmemleak but references to it | ||
668 | * are searched. | ||
669 | */ | ||
670 | static void object_no_scan(unsigned long ptr) | ||
671 | { | ||
672 | unsigned long flags; | ||
673 | struct kmemleak_object *object; | ||
674 | |||
675 | object = find_and_get_object(ptr, 0); | ||
676 | if (!object) { | ||
677 | kmemleak_warn("kmemleak: Not scanning unknown object at " | ||
678 | "0x%08lx\n", ptr); | ||
679 | return; | ||
680 | } | ||
681 | |||
682 | spin_lock_irqsave(&object->lock, flags); | ||
683 | object->flags |= OBJECT_NO_SCAN; | ||
684 | spin_unlock_irqrestore(&object->lock, flags); | ||
685 | put_object(object); | ||
686 | } | ||
687 | |||
688 | /* | ||
689 | * Log an early kmemleak_* call to the early_log buffer. These calls will be | ||
690 | * processed later once kmemleak is fully initialized. | ||
691 | */ | ||
692 | static void log_early(int op_type, const void *ptr, size_t size, | ||
693 | int min_count, unsigned long offset, size_t length) | ||
694 | { | ||
695 | unsigned long flags; | ||
696 | struct early_log *log; | ||
697 | |||
698 | if (crt_early_log >= ARRAY_SIZE(early_log)) { | ||
699 | kmemleak_panic("kmemleak: Early log buffer exceeded\n"); | ||
700 | return; | ||
701 | } | ||
702 | |||
703 | /* | ||
704 | * There is no need for locking since the kernel is still in UP mode | ||
705 | * at this stage. Disabling the IRQs is enough. | ||
706 | */ | ||
707 | local_irq_save(flags); | ||
708 | log = &early_log[crt_early_log]; | ||
709 | log->op_type = op_type; | ||
710 | log->ptr = ptr; | ||
711 | log->size = size; | ||
712 | log->min_count = min_count; | ||
713 | log->offset = offset; | ||
714 | log->length = length; | ||
715 | crt_early_log++; | ||
716 | local_irq_restore(flags); | ||
717 | } | ||
718 | |||
719 | /* | ||
720 | * Memory allocation function callback. This function is called from the | ||
721 | * kernel allocators when a new block is allocated (kmem_cache_alloc, kmalloc, | ||
722 | * vmalloc etc.). | ||
723 | */ | ||
724 | void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp) | ||
725 | { | ||
726 | pr_debug("%s(0x%p, %zu, %d)\n", __func__, ptr, size, min_count); | ||
727 | |||
728 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | ||
729 | create_object((unsigned long)ptr, size, min_count, gfp); | ||
730 | else if (atomic_read(&kmemleak_early_log)) | ||
731 | log_early(KMEMLEAK_ALLOC, ptr, size, min_count, 0, 0); | ||
732 | } | ||
733 | EXPORT_SYMBOL_GPL(kmemleak_alloc); | ||
734 | |||
735 | /* | ||
736 | * Memory freeing function callback. This function is called from the kernel | ||
737 | * allocators when a block is freed (kmem_cache_free, kfree, vfree etc.). | ||
738 | */ | ||
739 | void kmemleak_free(const void *ptr) | ||
740 | { | ||
741 | pr_debug("%s(0x%p)\n", __func__, ptr); | ||
742 | |||
743 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | ||
744 | delete_object((unsigned long)ptr); | ||
745 | else if (atomic_read(&kmemleak_early_log)) | ||
746 | log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0); | ||
747 | } | ||
748 | EXPORT_SYMBOL_GPL(kmemleak_free); | ||
749 | |||
750 | /* | ||
751 | * Mark an already allocated memory block as a false positive. This will cause | ||
752 | * the block to no longer be reported as leak and always be scanned. | ||
753 | */ | ||
754 | void kmemleak_not_leak(const void *ptr) | ||
755 | { | ||
756 | pr_debug("%s(0x%p)\n", __func__, ptr); | ||
757 | |||
758 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | ||
759 | make_gray_object((unsigned long)ptr); | ||
760 | else if (atomic_read(&kmemleak_early_log)) | ||
761 | log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0, 0, 0); | ||
762 | } | ||
763 | EXPORT_SYMBOL(kmemleak_not_leak); | ||
764 | |||
765 | /* | ||
766 | * Ignore a memory block. This is usually done when it is known that the | ||
767 | * corresponding block is not a leak and does not contain any references to | ||
768 | * other allocated memory blocks. | ||
769 | */ | ||
770 | void kmemleak_ignore(const void *ptr) | ||
771 | { | ||
772 | pr_debug("%s(0x%p)\n", __func__, ptr); | ||
773 | |||
774 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | ||
775 | make_black_object((unsigned long)ptr); | ||
776 | else if (atomic_read(&kmemleak_early_log)) | ||
777 | log_early(KMEMLEAK_IGNORE, ptr, 0, 0, 0, 0); | ||
778 | } | ||
779 | EXPORT_SYMBOL(kmemleak_ignore); | ||
780 | |||
781 | /* | ||
782 | * Limit the range to be scanned in an allocated memory block. | ||
783 | */ | ||
784 | void kmemleak_scan_area(const void *ptr, unsigned long offset, size_t length, | ||
785 | gfp_t gfp) | ||
786 | { | ||
787 | pr_debug("%s(0x%p)\n", __func__, ptr); | ||
788 | |||
789 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | ||
790 | add_scan_area((unsigned long)ptr, offset, length, gfp); | ||
791 | else if (atomic_read(&kmemleak_early_log)) | ||
792 | log_early(KMEMLEAK_SCAN_AREA, ptr, 0, 0, offset, length); | ||
793 | } | ||
794 | EXPORT_SYMBOL(kmemleak_scan_area); | ||
795 | |||
796 | /* | ||
797 | * Inform kmemleak not to scan the given memory block. | ||
798 | */ | ||
799 | void kmemleak_no_scan(const void *ptr) | ||
800 | { | ||
801 | pr_debug("%s(0x%p)\n", __func__, ptr); | ||
802 | |||
803 | if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) | ||
804 | object_no_scan((unsigned long)ptr); | ||
805 | else if (atomic_read(&kmemleak_early_log)) | ||
806 | log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0, 0, 0); | ||
807 | } | ||
808 | EXPORT_SYMBOL(kmemleak_no_scan); | ||
809 | |||
810 | /* | ||
811 | * Yield the CPU so that other tasks get a chance to run. The yielding is | ||
812 | * rate-limited to avoid excessive number of calls to the schedule() function | ||
813 | * during memory scanning. | ||
814 | */ | ||
815 | static void scan_yield(void) | ||
816 | { | ||
817 | might_sleep(); | ||
818 | |||
819 | if (time_is_before_eq_jiffies(next_scan_yield)) { | ||
820 | schedule(); | ||
821 | next_scan_yield = jiffies + jiffies_scan_yield; | ||
822 | } | ||
823 | } | ||
824 | |||
825 | /* | ||
826 | * Memory scanning is a long process and it needs to be interruptable. This | ||
827 | * function checks whether such interrupt condition occured. | ||
828 | */ | ||
829 | static int scan_should_stop(void) | ||
830 | { | ||
831 | if (!atomic_read(&kmemleak_enabled)) | ||
832 | return 1; | ||
833 | |||
834 | /* | ||
835 | * This function may be called from either process or kthread context, | ||
836 | * hence the need to check for both stop conditions. | ||
837 | */ | ||
838 | if (current->mm) | ||
839 | return signal_pending(current); | ||
840 | else | ||
841 | return kthread_should_stop(); | ||
842 | |||
843 | return 0; | ||
844 | } | ||
845 | |||
846 | /* | ||
847 | * Scan a memory block (exclusive range) for valid pointers and add those | ||
848 | * found to the gray list. | ||
849 | */ | ||
850 | static void scan_block(void *_start, void *_end, | ||
851 | struct kmemleak_object *scanned) | ||
852 | { | ||
853 | unsigned long *ptr; | ||
854 | unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER); | ||
855 | unsigned long *end = _end - (BYTES_PER_POINTER - 1); | ||
856 | |||
857 | for (ptr = start; ptr < end; ptr++) { | ||
858 | unsigned long flags; | ||
859 | unsigned long pointer = *ptr; | ||
860 | struct kmemleak_object *object; | ||
861 | |||
862 | if (scan_should_stop()) | ||
863 | break; | ||
864 | |||
865 | /* | ||
866 | * When scanning a memory block with a corresponding | ||
867 | * kmemleak_object, the CPU yielding is handled in the calling | ||
868 | * code since it holds the object->lock to avoid the block | ||
869 | * freeing. | ||
870 | */ | ||
871 | if (!scanned) | ||
872 | scan_yield(); | ||
873 | |||
874 | object = find_and_get_object(pointer, 1); | ||
875 | if (!object) | ||
876 | continue; | ||
877 | if (object == scanned) { | ||
878 | /* self referenced, ignore */ | ||
879 | put_object(object); | ||
880 | continue; | ||
881 | } | ||
882 | |||
883 | /* | ||
884 | * Avoid the lockdep recursive warning on object->lock being | ||
885 | * previously acquired in scan_object(). These locks are | ||
886 | * enclosed by scan_mutex. | ||
887 | */ | ||
888 | spin_lock_irqsave_nested(&object->lock, flags, | ||
889 | SINGLE_DEPTH_NESTING); | ||
890 | if (!color_white(object)) { | ||
891 | /* non-orphan, ignored or new */ | ||
892 | spin_unlock_irqrestore(&object->lock, flags); | ||
893 | put_object(object); | ||
894 | continue; | ||
895 | } | ||
896 | |||
897 | /* | ||
898 | * Increase the object's reference count (number of pointers | ||
899 | * to the memory block). If this count reaches the required | ||
900 | * minimum, the object's color will become gray and it will be | ||
901 | * added to the gray_list. | ||
902 | */ | ||
903 | object->count++; | ||
904 | if (color_gray(object)) | ||
905 | list_add_tail(&object->gray_list, &gray_list); | ||
906 | else | ||
907 | put_object(object); | ||
908 | spin_unlock_irqrestore(&object->lock, flags); | ||
909 | } | ||
910 | } | ||
911 | |||
912 | /* | ||
913 | * Scan a memory block corresponding to a kmemleak_object. A condition is | ||
914 | * that object->use_count >= 1. | ||
915 | */ | ||
916 | static void scan_object(struct kmemleak_object *object) | ||
917 | { | ||
918 | struct kmemleak_scan_area *area; | ||
919 | struct hlist_node *elem; | ||
920 | unsigned long flags; | ||
921 | |||
922 | /* | ||
923 | * Once the object->lock is aquired, the corresponding memory block | ||
924 | * cannot be freed (the same lock is aquired in delete_object). | ||
925 | */ | ||
926 | spin_lock_irqsave(&object->lock, flags); | ||
927 | if (object->flags & OBJECT_NO_SCAN) | ||
928 | goto out; | ||
929 | if (!(object->flags & OBJECT_ALLOCATED)) | ||
930 | /* already freed object */ | ||
931 | goto out; | ||
932 | if (hlist_empty(&object->area_list)) | ||
933 | scan_block((void *)object->pointer, | ||
934 | (void *)(object->pointer + object->size), object); | ||
935 | else | ||
936 | hlist_for_each_entry(area, elem, &object->area_list, node) | ||
937 | scan_block((void *)(object->pointer + area->offset), | ||
938 | (void *)(object->pointer + area->offset | ||
939 | + area->length), object); | ||
940 | out: | ||
941 | spin_unlock_irqrestore(&object->lock, flags); | ||
942 | } | ||
943 | |||
944 | /* | ||
945 | * Scan data sections and all the referenced memory blocks allocated via the | ||
946 | * kernel's standard allocators. This function must be called with the | ||
947 | * scan_mutex held. | ||
948 | */ | ||
949 | static void kmemleak_scan(void) | ||
950 | { | ||
951 | unsigned long flags; | ||
952 | struct kmemleak_object *object, *tmp; | ||
953 | struct task_struct *task; | ||
954 | int i; | ||
955 | |||
956 | /* prepare the kmemleak_object's */ | ||
957 | rcu_read_lock(); | ||
958 | list_for_each_entry_rcu(object, &object_list, object_list) { | ||
959 | spin_lock_irqsave(&object->lock, flags); | ||
960 | #ifdef DEBUG | ||
961 | /* | ||
962 | * With a few exceptions there should be a maximum of | ||
963 | * 1 reference to any object at this point. | ||
964 | */ | ||
965 | if (atomic_read(&object->use_count) > 1) { | ||
966 | pr_debug("kmemleak: object->use_count = %d\n", | ||
967 | atomic_read(&object->use_count)); | ||
968 | dump_object_info(object); | ||
969 | } | ||
970 | #endif | ||
971 | /* reset the reference count (whiten the object) */ | ||
972 | object->count = 0; | ||
973 | if (color_gray(object) && get_object(object)) | ||
974 | list_add_tail(&object->gray_list, &gray_list); | ||
975 | |||
976 | spin_unlock_irqrestore(&object->lock, flags); | ||
977 | } | ||
978 | rcu_read_unlock(); | ||
979 | |||
980 | /* data/bss scanning */ | ||
981 | scan_block(_sdata, _edata, NULL); | ||
982 | scan_block(__bss_start, __bss_stop, NULL); | ||
983 | |||
984 | #ifdef CONFIG_SMP | ||
985 | /* per-cpu sections scanning */ | ||
986 | for_each_possible_cpu(i) | ||
987 | scan_block(__per_cpu_start + per_cpu_offset(i), | ||
988 | __per_cpu_end + per_cpu_offset(i), NULL); | ||
989 | #endif | ||
990 | |||
991 | /* | ||
992 | * Struct page scanning for each node. The code below is not yet safe | ||
993 | * with MEMORY_HOTPLUG. | ||
994 | */ | ||
995 | for_each_online_node(i) { | ||
996 | pg_data_t *pgdat = NODE_DATA(i); | ||
997 | unsigned long start_pfn = pgdat->node_start_pfn; | ||
998 | unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; | ||
999 | unsigned long pfn; | ||
1000 | |||
1001 | for (pfn = start_pfn; pfn < end_pfn; pfn++) { | ||
1002 | struct page *page; | ||
1003 | |||
1004 | if (!pfn_valid(pfn)) | ||
1005 | continue; | ||
1006 | page = pfn_to_page(pfn); | ||
1007 | /* only scan if page is in use */ | ||
1008 | if (page_count(page) == 0) | ||
1009 | continue; | ||
1010 | scan_block(page, page + 1, NULL); | ||
1011 | } | ||
1012 | } | ||
1013 | |||
1014 | /* | ||
1015 | * Scanning the task stacks may introduce false negatives and it is | ||
1016 | * not enabled by default. | ||
1017 | */ | ||
1018 | if (kmemleak_stack_scan) { | ||
1019 | read_lock(&tasklist_lock); | ||
1020 | for_each_process(task) | ||
1021 | scan_block(task_stack_page(task), | ||
1022 | task_stack_page(task) + THREAD_SIZE, NULL); | ||
1023 | read_unlock(&tasklist_lock); | ||
1024 | } | ||
1025 | |||
1026 | /* | ||
1027 | * Scan the objects already referenced from the sections scanned | ||
1028 | * above. More objects will be referenced and, if there are no memory | ||
1029 | * leaks, all the objects will be scanned. The list traversal is safe | ||
1030 | * for both tail additions and removals from inside the loop. The | ||
1031 | * kmemleak objects cannot be freed from outside the loop because their | ||
1032 | * use_count was increased. | ||
1033 | */ | ||
1034 | object = list_entry(gray_list.next, typeof(*object), gray_list); | ||
1035 | while (&object->gray_list != &gray_list) { | ||
1036 | scan_yield(); | ||
1037 | |||
1038 | /* may add new objects to the list */ | ||
1039 | if (!scan_should_stop()) | ||
1040 | scan_object(object); | ||
1041 | |||
1042 | tmp = list_entry(object->gray_list.next, typeof(*object), | ||
1043 | gray_list); | ||
1044 | |||
1045 | /* remove the object from the list and release it */ | ||
1046 | list_del(&object->gray_list); | ||
1047 | put_object(object); | ||
1048 | |||
1049 | object = tmp; | ||
1050 | } | ||
1051 | WARN_ON(!list_empty(&gray_list)); | ||
1052 | } | ||
1053 | |||
1054 | /* | ||
1055 | * Thread function performing automatic memory scanning. Unreferenced objects | ||
1056 | * at the end of a memory scan are reported but only the first time. | ||
1057 | */ | ||
1058 | static int kmemleak_scan_thread(void *arg) | ||
1059 | { | ||
1060 | static int first_run = 1; | ||
1061 | |||
1062 | pr_info("kmemleak: Automatic memory scanning thread started\n"); | ||
1063 | |||
1064 | /* | ||
1065 | * Wait before the first scan to allow the system to fully initialize. | ||
1066 | */ | ||
1067 | if (first_run) { | ||
1068 | first_run = 0; | ||
1069 | ssleep(SECS_FIRST_SCAN); | ||
1070 | } | ||
1071 | |||
1072 | while (!kthread_should_stop()) { | ||
1073 | struct kmemleak_object *object; | ||
1074 | signed long timeout = jiffies_scan_wait; | ||
1075 | |||
1076 | mutex_lock(&scan_mutex); | ||
1077 | |||
1078 | kmemleak_scan(); | ||
1079 | reported_leaks = 0; | ||
1080 | |||
1081 | rcu_read_lock(); | ||
1082 | list_for_each_entry_rcu(object, &object_list, object_list) { | ||
1083 | unsigned long flags; | ||
1084 | |||
1085 | if (reported_leaks >= REPORTS_NR) | ||
1086 | break; | ||
1087 | spin_lock_irqsave(&object->lock, flags); | ||
1088 | if (!(object->flags & OBJECT_REPORTED) && | ||
1089 | unreferenced_object(object)) { | ||
1090 | print_unreferenced(NULL, object); | ||
1091 | object->flags |= OBJECT_REPORTED; | ||
1092 | reported_leaks++; | ||
1093 | } else if ((object->flags & OBJECT_REPORTED) && | ||
1094 | referenced_object(object)) { | ||
1095 | print_referenced(object); | ||
1096 | object->flags &= ~OBJECT_REPORTED; | ||
1097 | } | ||
1098 | spin_unlock_irqrestore(&object->lock, flags); | ||
1099 | } | ||
1100 | rcu_read_unlock(); | ||
1101 | |||
1102 | mutex_unlock(&scan_mutex); | ||
1103 | /* wait before the next scan */ | ||
1104 | while (timeout && !kthread_should_stop()) | ||
1105 | timeout = schedule_timeout_interruptible(timeout); | ||
1106 | } | ||
1107 | |||
1108 | pr_info("kmemleak: Automatic memory scanning thread ended\n"); | ||
1109 | |||
1110 | return 0; | ||
1111 | } | ||
1112 | |||
1113 | /* | ||
1114 | * Start the automatic memory scanning thread. This function must be called | ||
1115 | * with the kmemleak_mutex held. | ||
1116 | */ | ||
1117 | void start_scan_thread(void) | ||
1118 | { | ||
1119 | if (scan_thread) | ||
1120 | return; | ||
1121 | scan_thread = kthread_run(kmemleak_scan_thread, NULL, "kmemleak"); | ||
1122 | if (IS_ERR(scan_thread)) { | ||
1123 | pr_warning("kmemleak: Failed to create the scan thread\n"); | ||
1124 | scan_thread = NULL; | ||
1125 | } | ||
1126 | } | ||
1127 | |||
1128 | /* | ||
1129 | * Stop the automatic memory scanning thread. This function must be called | ||
1130 | * with the kmemleak_mutex held. | ||
1131 | */ | ||
1132 | void stop_scan_thread(void) | ||
1133 | { | ||
1134 | if (scan_thread) { | ||
1135 | kthread_stop(scan_thread); | ||
1136 | scan_thread = NULL; | ||
1137 | } | ||
1138 | } | ||
1139 | |||
1140 | /* | ||
1141 | * Iterate over the object_list and return the first valid object at or after | ||
1142 | * the required position with its use_count incremented. The function triggers | ||
1143 | * a memory scanning when the pos argument points to the first position. | ||
1144 | */ | ||
1145 | static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos) | ||
1146 | { | ||
1147 | struct kmemleak_object *object; | ||
1148 | loff_t n = *pos; | ||
1149 | |||
1150 | if (!n) { | ||
1151 | kmemleak_scan(); | ||
1152 | reported_leaks = 0; | ||
1153 | } | ||
1154 | if (reported_leaks >= REPORTS_NR) | ||
1155 | return NULL; | ||
1156 | |||
1157 | rcu_read_lock(); | ||
1158 | list_for_each_entry_rcu(object, &object_list, object_list) { | ||
1159 | if (n-- > 0) | ||
1160 | continue; | ||
1161 | if (get_object(object)) | ||
1162 | goto out; | ||
1163 | } | ||
1164 | object = NULL; | ||
1165 | out: | ||
1166 | rcu_read_unlock(); | ||
1167 | return object; | ||
1168 | } | ||
1169 | |||
1170 | /* | ||
1171 | * Return the next object in the object_list. The function decrements the | ||
1172 | * use_count of the previous object and increases that of the next one. | ||
1173 | */ | ||
1174 | static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
1175 | { | ||
1176 | struct kmemleak_object *prev_obj = v; | ||
1177 | struct kmemleak_object *next_obj = NULL; | ||
1178 | struct list_head *n = &prev_obj->object_list; | ||
1179 | |||
1180 | ++(*pos); | ||
1181 | if (reported_leaks >= REPORTS_NR) | ||
1182 | goto out; | ||
1183 | |||
1184 | rcu_read_lock(); | ||
1185 | list_for_each_continue_rcu(n, &object_list) { | ||
1186 | next_obj = list_entry(n, struct kmemleak_object, object_list); | ||
1187 | if (get_object(next_obj)) | ||
1188 | break; | ||
1189 | } | ||
1190 | rcu_read_unlock(); | ||
1191 | out: | ||
1192 | put_object(prev_obj); | ||
1193 | return next_obj; | ||
1194 | } | ||
1195 | |||
1196 | /* | ||
1197 | * Decrement the use_count of the last object required, if any. | ||
1198 | */ | ||
1199 | static void kmemleak_seq_stop(struct seq_file *seq, void *v) | ||
1200 | { | ||
1201 | if (v) | ||
1202 | put_object(v); | ||
1203 | } | ||
1204 | |||
1205 | /* | ||
1206 | * Print the information for an unreferenced object to the seq file. | ||
1207 | */ | ||
1208 | static int kmemleak_seq_show(struct seq_file *seq, void *v) | ||
1209 | { | ||
1210 | struct kmemleak_object *object = v; | ||
1211 | unsigned long flags; | ||
1212 | |||
1213 | spin_lock_irqsave(&object->lock, flags); | ||
1214 | if (!unreferenced_object(object)) | ||
1215 | goto out; | ||
1216 | print_unreferenced(seq, object); | ||
1217 | reported_leaks++; | ||
1218 | out: | ||
1219 | spin_unlock_irqrestore(&object->lock, flags); | ||
1220 | return 0; | ||
1221 | } | ||
1222 | |||
1223 | static const struct seq_operations kmemleak_seq_ops = { | ||
1224 | .start = kmemleak_seq_start, | ||
1225 | .next = kmemleak_seq_next, | ||
1226 | .stop = kmemleak_seq_stop, | ||
1227 | .show = kmemleak_seq_show, | ||
1228 | }; | ||
1229 | |||
1230 | static int kmemleak_open(struct inode *inode, struct file *file) | ||
1231 | { | ||
1232 | int ret = 0; | ||
1233 | |||
1234 | if (!atomic_read(&kmemleak_enabled)) | ||
1235 | return -EBUSY; | ||
1236 | |||
1237 | ret = mutex_lock_interruptible(&kmemleak_mutex); | ||
1238 | if (ret < 0) | ||
1239 | goto out; | ||
1240 | if (file->f_mode & FMODE_READ) { | ||
1241 | ret = mutex_lock_interruptible(&scan_mutex); | ||
1242 | if (ret < 0) | ||
1243 | goto kmemleak_unlock; | ||
1244 | ret = seq_open(file, &kmemleak_seq_ops); | ||
1245 | if (ret < 0) | ||
1246 | goto scan_unlock; | ||
1247 | } | ||
1248 | return ret; | ||
1249 | |||
1250 | scan_unlock: | ||
1251 | mutex_unlock(&scan_mutex); | ||
1252 | kmemleak_unlock: | ||
1253 | mutex_unlock(&kmemleak_mutex); | ||
1254 | out: | ||
1255 | return ret; | ||
1256 | } | ||
1257 | |||
1258 | static int kmemleak_release(struct inode *inode, struct file *file) | ||
1259 | { | ||
1260 | int ret = 0; | ||
1261 | |||
1262 | if (file->f_mode & FMODE_READ) { | ||
1263 | seq_release(inode, file); | ||
1264 | mutex_unlock(&scan_mutex); | ||
1265 | } | ||
1266 | mutex_unlock(&kmemleak_mutex); | ||
1267 | |||
1268 | return ret; | ||
1269 | } | ||
1270 | |||
1271 | /* | ||
1272 | * File write operation to configure kmemleak at run-time. The following | ||
1273 | * commands can be written to the /sys/kernel/debug/kmemleak file: | ||
1274 | * off - disable kmemleak (irreversible) | ||
1275 | * stack=on - enable the task stacks scanning | ||
1276 | * stack=off - disable the tasks stacks scanning | ||
1277 | * scan=on - start the automatic memory scanning thread | ||
1278 | * scan=off - stop the automatic memory scanning thread | ||
1279 | * scan=... - set the automatic memory scanning period in seconds (0 to | ||
1280 | * disable it) | ||
1281 | */ | ||
1282 | static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, | ||
1283 | size_t size, loff_t *ppos) | ||
1284 | { | ||
1285 | char buf[64]; | ||
1286 | int buf_size; | ||
1287 | |||
1288 | if (!atomic_read(&kmemleak_enabled)) | ||
1289 | return -EBUSY; | ||
1290 | |||
1291 | buf_size = min(size, (sizeof(buf) - 1)); | ||
1292 | if (strncpy_from_user(buf, user_buf, buf_size) < 0) | ||
1293 | return -EFAULT; | ||
1294 | buf[buf_size] = 0; | ||
1295 | |||
1296 | if (strncmp(buf, "off", 3) == 0) | ||
1297 | kmemleak_disable(); | ||
1298 | else if (strncmp(buf, "stack=on", 8) == 0) | ||
1299 | kmemleak_stack_scan = 1; | ||
1300 | else if (strncmp(buf, "stack=off", 9) == 0) | ||
1301 | kmemleak_stack_scan = 0; | ||
1302 | else if (strncmp(buf, "scan=on", 7) == 0) | ||
1303 | start_scan_thread(); | ||
1304 | else if (strncmp(buf, "scan=off", 8) == 0) | ||
1305 | stop_scan_thread(); | ||
1306 | else if (strncmp(buf, "scan=", 5) == 0) { | ||
1307 | unsigned long secs; | ||
1308 | int err; | ||
1309 | |||
1310 | err = strict_strtoul(buf + 5, 0, &secs); | ||
1311 | if (err < 0) | ||
1312 | return err; | ||
1313 | stop_scan_thread(); | ||
1314 | if (secs) { | ||
1315 | jiffies_scan_wait = msecs_to_jiffies(secs * 1000); | ||
1316 | start_scan_thread(); | ||
1317 | } | ||
1318 | } else | ||
1319 | return -EINVAL; | ||
1320 | |||
1321 | /* ignore the rest of the buffer, only one command at a time */ | ||
1322 | *ppos += size; | ||
1323 | return size; | ||
1324 | } | ||
1325 | |||
1326 | static const struct file_operations kmemleak_fops = { | ||
1327 | .owner = THIS_MODULE, | ||
1328 | .open = kmemleak_open, | ||
1329 | .read = seq_read, | ||
1330 | .write = kmemleak_write, | ||
1331 | .llseek = seq_lseek, | ||
1332 | .release = kmemleak_release, | ||
1333 | }; | ||
1334 | |||
1335 | /* | ||
1336 | * Perform the freeing of the kmemleak internal objects after waiting for any | ||
1337 | * current memory scan to complete. | ||
1338 | */ | ||
1339 | static int kmemleak_cleanup_thread(void *arg) | ||
1340 | { | ||
1341 | struct kmemleak_object *object; | ||
1342 | |||
1343 | mutex_lock(&kmemleak_mutex); | ||
1344 | stop_scan_thread(); | ||
1345 | mutex_unlock(&kmemleak_mutex); | ||
1346 | |||
1347 | mutex_lock(&scan_mutex); | ||
1348 | rcu_read_lock(); | ||
1349 | list_for_each_entry_rcu(object, &object_list, object_list) | ||
1350 | delete_object(object->pointer); | ||
1351 | rcu_read_unlock(); | ||
1352 | mutex_unlock(&scan_mutex); | ||
1353 | |||
1354 | return 0; | ||
1355 | } | ||
1356 | |||
1357 | /* | ||
1358 | * Start the clean-up thread. | ||
1359 | */ | ||
1360 | static void kmemleak_cleanup(void) | ||
1361 | { | ||
1362 | struct task_struct *cleanup_thread; | ||
1363 | |||
1364 | cleanup_thread = kthread_run(kmemleak_cleanup_thread, NULL, | ||
1365 | "kmemleak-clean"); | ||
1366 | if (IS_ERR(cleanup_thread)) | ||
1367 | pr_warning("kmemleak: Failed to create the clean-up thread\n"); | ||
1368 | } | ||
1369 | |||
1370 | /* | ||
1371 | * Disable kmemleak. No memory allocation/freeing will be traced once this | ||
1372 | * function is called. Disabling kmemleak is an irreversible operation. | ||
1373 | */ | ||
1374 | static void kmemleak_disable(void) | ||
1375 | { | ||
1376 | /* atomically check whether it was already invoked */ | ||
1377 | if (atomic_cmpxchg(&kmemleak_error, 0, 1)) | ||
1378 | return; | ||
1379 | |||
1380 | /* stop any memory operation tracing */ | ||
1381 | atomic_set(&kmemleak_early_log, 0); | ||
1382 | atomic_set(&kmemleak_enabled, 0); | ||
1383 | |||
1384 | /* check whether it is too early for a kernel thread */ | ||
1385 | if (atomic_read(&kmemleak_initialized)) | ||
1386 | kmemleak_cleanup(); | ||
1387 | |||
1388 | pr_info("Kernel memory leak detector disabled\n"); | ||
1389 | } | ||
1390 | |||
1391 | /* | ||
1392 | * Allow boot-time kmemleak disabling (enabled by default). | ||
1393 | */ | ||
1394 | static int kmemleak_boot_config(char *str) | ||
1395 | { | ||
1396 | if (!str) | ||
1397 | return -EINVAL; | ||
1398 | if (strcmp(str, "off") == 0) | ||
1399 | kmemleak_disable(); | ||
1400 | else if (strcmp(str, "on") != 0) | ||
1401 | return -EINVAL; | ||
1402 | return 0; | ||
1403 | } | ||
1404 | early_param("kmemleak", kmemleak_boot_config); | ||
1405 | |||
1406 | /* | ||
1407 | * Kkmemleak initialization. | ||
1408 | */ | ||
1409 | void __init kmemleak_init(void) | ||
1410 | { | ||
1411 | int i; | ||
1412 | unsigned long flags; | ||
1413 | |||
1414 | jiffies_scan_yield = msecs_to_jiffies(MSECS_SCAN_YIELD); | ||
1415 | jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE); | ||
1416 | jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000); | ||
1417 | |||
1418 | object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE); | ||
1419 | scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE); | ||
1420 | INIT_PRIO_TREE_ROOT(&object_tree_root); | ||
1421 | |||
1422 | /* the kernel is still in UP mode, so disabling the IRQs is enough */ | ||
1423 | local_irq_save(flags); | ||
1424 | if (!atomic_read(&kmemleak_error)) { | ||
1425 | atomic_set(&kmemleak_enabled, 1); | ||
1426 | atomic_set(&kmemleak_early_log, 0); | ||
1427 | } | ||
1428 | local_irq_restore(flags); | ||
1429 | |||
1430 | /* | ||
1431 | * This is the point where tracking allocations is safe. Automatic | ||
1432 | * scanning is started during the late initcall. Add the early logged | ||
1433 | * callbacks to the kmemleak infrastructure. | ||
1434 | */ | ||
1435 | for (i = 0; i < crt_early_log; i++) { | ||
1436 | struct early_log *log = &early_log[i]; | ||
1437 | |||
1438 | switch (log->op_type) { | ||
1439 | case KMEMLEAK_ALLOC: | ||
1440 | kmemleak_alloc(log->ptr, log->size, log->min_count, | ||
1441 | GFP_KERNEL); | ||
1442 | break; | ||
1443 | case KMEMLEAK_FREE: | ||
1444 | kmemleak_free(log->ptr); | ||
1445 | break; | ||
1446 | case KMEMLEAK_NOT_LEAK: | ||
1447 | kmemleak_not_leak(log->ptr); | ||
1448 | break; | ||
1449 | case KMEMLEAK_IGNORE: | ||
1450 | kmemleak_ignore(log->ptr); | ||
1451 | break; | ||
1452 | case KMEMLEAK_SCAN_AREA: | ||
1453 | kmemleak_scan_area(log->ptr, log->offset, log->length, | ||
1454 | GFP_KERNEL); | ||
1455 | break; | ||
1456 | case KMEMLEAK_NO_SCAN: | ||
1457 | kmemleak_no_scan(log->ptr); | ||
1458 | break; | ||
1459 | default: | ||
1460 | WARN_ON(1); | ||
1461 | } | ||
1462 | } | ||
1463 | } | ||
1464 | |||
1465 | /* | ||
1466 | * Late initialization function. | ||
1467 | */ | ||
1468 | static int __init kmemleak_late_init(void) | ||
1469 | { | ||
1470 | struct dentry *dentry; | ||
1471 | |||
1472 | atomic_set(&kmemleak_initialized, 1); | ||
1473 | |||
1474 | if (atomic_read(&kmemleak_error)) { | ||
1475 | /* | ||
1476 | * Some error occured and kmemleak was disabled. There is a | ||
1477 | * small chance that kmemleak_disable() was called immediately | ||
1478 | * after setting kmemleak_initialized and we may end up with | ||
1479 | * two clean-up threads but serialized by scan_mutex. | ||
1480 | */ | ||
1481 | kmemleak_cleanup(); | ||
1482 | return -ENOMEM; | ||
1483 | } | ||
1484 | |||
1485 | dentry = debugfs_create_file("kmemleak", S_IRUGO, NULL, NULL, | ||
1486 | &kmemleak_fops); | ||
1487 | if (!dentry) | ||
1488 | pr_warning("kmemleak: Failed to create the debugfs kmemleak " | ||
1489 | "file\n"); | ||
1490 | mutex_lock(&kmemleak_mutex); | ||
1491 | start_scan_thread(); | ||
1492 | mutex_unlock(&kmemleak_mutex); | ||
1493 | |||
1494 | pr_info("Kernel memory leak detector initialized\n"); | ||
1495 | |||
1496 | return 0; | ||
1497 | } | ||
1498 | late_initcall(kmemleak_late_init); | ||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/mempolicy.h> | 28 | #include <linux/mempolicy.h> |
29 | #include <linux/rmap.h> | 29 | #include <linux/rmap.h> |
30 | #include <linux/mmu_notifier.h> | 30 | #include <linux/mmu_notifier.h> |
31 | #include <linux/perf_counter.h> | ||
31 | 32 | ||
32 | #include <asm/uaccess.h> | 33 | #include <asm/uaccess.h> |
33 | #include <asm/cacheflush.h> | 34 | #include <asm/cacheflush.h> |
@@ -1222,6 +1223,8 @@ munmap_back: | |||
1222 | if (correct_wcount) | 1223 | if (correct_wcount) |
1223 | atomic_inc(&inode->i_writecount); | 1224 | atomic_inc(&inode->i_writecount); |
1224 | out: | 1225 | out: |
1226 | perf_counter_mmap(vma); | ||
1227 | |||
1225 | mm->total_vm += len >> PAGE_SHIFT; | 1228 | mm->total_vm += len >> PAGE_SHIFT; |
1226 | vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); | 1229 | vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); |
1227 | if (vm_flags & VM_LOCKED) { | 1230 | if (vm_flags & VM_LOCKED) { |
@@ -2308,6 +2311,8 @@ int install_special_mapping(struct mm_struct *mm, | |||
2308 | 2311 | ||
2309 | mm->total_vm += len >> PAGE_SHIFT; | 2312 | mm->total_vm += len >> PAGE_SHIFT; |
2310 | 2313 | ||
2314 | perf_counter_mmap(vma); | ||
2315 | |||
2311 | return 0; | 2316 | return 0; |
2312 | } | 2317 | } |
2313 | 2318 | ||
diff --git a/mm/mprotect.c b/mm/mprotect.c index 258197b76fb4..d80311baeb2d 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/swapops.h> | 23 | #include <linux/swapops.h> |
24 | #include <linux/mmu_notifier.h> | 24 | #include <linux/mmu_notifier.h> |
25 | #include <linux/migrate.h> | 25 | #include <linux/migrate.h> |
26 | #include <linux/perf_counter.h> | ||
26 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
27 | #include <asm/pgtable.h> | 28 | #include <asm/pgtable.h> |
28 | #include <asm/cacheflush.h> | 29 | #include <asm/cacheflush.h> |
@@ -299,6 +300,7 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, | |||
299 | error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); | 300 | error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); |
300 | if (error) | 301 | if (error) |
301 | goto out; | 302 | goto out; |
303 | perf_counter_mmap(vma); | ||
302 | nstart = tmp; | 304 | nstart = tmp; |
303 | 305 | ||
304 | if (nstart < prev->vm_end) | 306 | if (nstart < prev->vm_end) |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 474c7e9dd51a..17d5f539a9aa 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <linux/page-isolation.h> | 46 | #include <linux/page-isolation.h> |
47 | #include <linux/page_cgroup.h> | 47 | #include <linux/page_cgroup.h> |
48 | #include <linux/debugobjects.h> | 48 | #include <linux/debugobjects.h> |
49 | #include <linux/kmemleak.h> | ||
49 | 50 | ||
50 | #include <asm/tlbflush.h> | 51 | #include <asm/tlbflush.h> |
51 | #include <asm/div64.h> | 52 | #include <asm/div64.h> |
@@ -4546,6 +4547,16 @@ void *__init alloc_large_system_hash(const char *tablename, | |||
4546 | if (_hash_mask) | 4547 | if (_hash_mask) |
4547 | *_hash_mask = (1 << log2qty) - 1; | 4548 | *_hash_mask = (1 << log2qty) - 1; |
4548 | 4549 | ||
4550 | /* | ||
4551 | * If hashdist is set, the table allocation is done with __vmalloc() | ||
4552 | * which invokes the kmemleak_alloc() callback. This function may also | ||
4553 | * be called before the slab and kmemleak are initialised when | ||
4554 | * kmemleak simply buffers the request to be executed later | ||
4555 | * (GFP_ATOMIC flag ignored in this case). | ||
4556 | */ | ||
4557 | if (!hashdist) | ||
4558 | kmemleak_alloc(table, size, 1, GFP_ATOMIC); | ||
4559 | |||
4549 | return table; | 4560 | return table; |
4550 | } | 4561 | } |
4551 | 4562 | ||
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 791905c991df..3dd4a909a1de 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c | |||
@@ -47,6 +47,8 @@ static int __init alloc_node_page_cgroup(int nid) | |||
47 | struct page_cgroup *base, *pc; | 47 | struct page_cgroup *base, *pc; |
48 | unsigned long table_size; | 48 | unsigned long table_size; |
49 | unsigned long start_pfn, nr_pages, index; | 49 | unsigned long start_pfn, nr_pages, index; |
50 | struct page *page; | ||
51 | unsigned int order; | ||
50 | 52 | ||
51 | start_pfn = NODE_DATA(nid)->node_start_pfn; | 53 | start_pfn = NODE_DATA(nid)->node_start_pfn; |
52 | nr_pages = NODE_DATA(nid)->node_spanned_pages; | 54 | nr_pages = NODE_DATA(nid)->node_spanned_pages; |
@@ -55,11 +57,13 @@ static int __init alloc_node_page_cgroup(int nid) | |||
55 | return 0; | 57 | return 0; |
56 | 58 | ||
57 | table_size = sizeof(struct page_cgroup) * nr_pages; | 59 | table_size = sizeof(struct page_cgroup) * nr_pages; |
58 | 60 | order = get_order(table_size); | |
59 | base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), | 61 | page = alloc_pages_node(nid, GFP_NOWAIT | __GFP_ZERO, order); |
60 | table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); | 62 | if (!page) |
61 | if (!base) | 63 | page = alloc_pages_node(-1, GFP_NOWAIT | __GFP_ZERO, order); |
64 | if (!page) | ||
62 | return -ENOMEM; | 65 | return -ENOMEM; |
66 | base = page_address(page); | ||
63 | for (index = 0; index < nr_pages; index++) { | 67 | for (index = 0; index < nr_pages; index++) { |
64 | pc = base + index; | 68 | pc = base + index; |
65 | __init_page_cgroup(pc, start_pfn + index); | 69 | __init_page_cgroup(pc, start_pfn + index); |
@@ -107,6 +107,7 @@ | |||
107 | #include <linux/string.h> | 107 | #include <linux/string.h> |
108 | #include <linux/uaccess.h> | 108 | #include <linux/uaccess.h> |
109 | #include <linux/nodemask.h> | 109 | #include <linux/nodemask.h> |
110 | #include <linux/kmemleak.h> | ||
110 | #include <linux/mempolicy.h> | 111 | #include <linux/mempolicy.h> |
111 | #include <linux/mutex.h> | 112 | #include <linux/mutex.h> |
112 | #include <linux/fault-inject.h> | 113 | #include <linux/fault-inject.h> |
@@ -178,13 +179,13 @@ | |||
178 | SLAB_STORE_USER | \ | 179 | SLAB_STORE_USER | \ |
179 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 180 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
180 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ | 181 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ |
181 | SLAB_DEBUG_OBJECTS) | 182 | SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE) |
182 | #else | 183 | #else |
183 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ | 184 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ |
184 | SLAB_CACHE_DMA | \ | 185 | SLAB_CACHE_DMA | \ |
185 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 186 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
186 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ | 187 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ |
187 | SLAB_DEBUG_OBJECTS) | 188 | SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE) |
188 | #endif | 189 | #endif |
189 | 190 | ||
190 | /* | 191 | /* |
@@ -315,7 +316,7 @@ static int drain_freelist(struct kmem_cache *cache, | |||
315 | struct kmem_list3 *l3, int tofree); | 316 | struct kmem_list3 *l3, int tofree); |
316 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, | 317 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, |
317 | int node); | 318 | int node); |
318 | static int enable_cpucache(struct kmem_cache *cachep); | 319 | static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); |
319 | static void cache_reap(struct work_struct *unused); | 320 | static void cache_reap(struct work_struct *unused); |
320 | 321 | ||
321 | /* | 322 | /* |
@@ -958,12 +959,20 @@ static void __cpuinit start_cpu_timer(int cpu) | |||
958 | } | 959 | } |
959 | 960 | ||
960 | static struct array_cache *alloc_arraycache(int node, int entries, | 961 | static struct array_cache *alloc_arraycache(int node, int entries, |
961 | int batchcount) | 962 | int batchcount, gfp_t gfp) |
962 | { | 963 | { |
963 | int memsize = sizeof(void *) * entries + sizeof(struct array_cache); | 964 | int memsize = sizeof(void *) * entries + sizeof(struct array_cache); |
964 | struct array_cache *nc = NULL; | 965 | struct array_cache *nc = NULL; |
965 | 966 | ||
966 | nc = kmalloc_node(memsize, GFP_KERNEL, node); | 967 | nc = kmalloc_node(memsize, gfp, node); |
968 | /* | ||
969 | * The array_cache structures contain pointers to free object. | ||
970 | * However, when such objects are allocated or transfered to another | ||
971 | * cache the pointers are not cleared and they could be counted as | ||
972 | * valid references during a kmemleak scan. Therefore, kmemleak must | ||
973 | * not scan such objects. | ||
974 | */ | ||
975 | kmemleak_no_scan(nc); | ||
967 | if (nc) { | 976 | if (nc) { |
968 | nc->avail = 0; | 977 | nc->avail = 0; |
969 | nc->limit = entries; | 978 | nc->limit = entries; |
@@ -1003,7 +1012,7 @@ static int transfer_objects(struct array_cache *to, | |||
1003 | #define drain_alien_cache(cachep, alien) do { } while (0) | 1012 | #define drain_alien_cache(cachep, alien) do { } while (0) |
1004 | #define reap_alien(cachep, l3) do { } while (0) | 1013 | #define reap_alien(cachep, l3) do { } while (0) |
1005 | 1014 | ||
1006 | static inline struct array_cache **alloc_alien_cache(int node, int limit) | 1015 | static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) |
1007 | { | 1016 | { |
1008 | return (struct array_cache **)BAD_ALIEN_MAGIC; | 1017 | return (struct array_cache **)BAD_ALIEN_MAGIC; |
1009 | } | 1018 | } |
@@ -1034,7 +1043,7 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep, | |||
1034 | static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); | 1043 | static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); |
1035 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t); | 1044 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t); |
1036 | 1045 | ||
1037 | static struct array_cache **alloc_alien_cache(int node, int limit) | 1046 | static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) |
1038 | { | 1047 | { |
1039 | struct array_cache **ac_ptr; | 1048 | struct array_cache **ac_ptr; |
1040 | int memsize = sizeof(void *) * nr_node_ids; | 1049 | int memsize = sizeof(void *) * nr_node_ids; |
@@ -1042,14 +1051,14 @@ static struct array_cache **alloc_alien_cache(int node, int limit) | |||
1042 | 1051 | ||
1043 | if (limit > 1) | 1052 | if (limit > 1) |
1044 | limit = 12; | 1053 | limit = 12; |
1045 | ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node); | 1054 | ac_ptr = kmalloc_node(memsize, gfp, node); |
1046 | if (ac_ptr) { | 1055 | if (ac_ptr) { |
1047 | for_each_node(i) { | 1056 | for_each_node(i) { |
1048 | if (i == node || !node_online(i)) { | 1057 | if (i == node || !node_online(i)) { |
1049 | ac_ptr[i] = NULL; | 1058 | ac_ptr[i] = NULL; |
1050 | continue; | 1059 | continue; |
1051 | } | 1060 | } |
1052 | ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); | 1061 | ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp); |
1053 | if (!ac_ptr[i]) { | 1062 | if (!ac_ptr[i]) { |
1054 | for (i--; i >= 0; i--) | 1063 | for (i--; i >= 0; i--) |
1055 | kfree(ac_ptr[i]); | 1064 | kfree(ac_ptr[i]); |
@@ -1282,20 +1291,20 @@ static int __cpuinit cpuup_prepare(long cpu) | |||
1282 | struct array_cache **alien = NULL; | 1291 | struct array_cache **alien = NULL; |
1283 | 1292 | ||
1284 | nc = alloc_arraycache(node, cachep->limit, | 1293 | nc = alloc_arraycache(node, cachep->limit, |
1285 | cachep->batchcount); | 1294 | cachep->batchcount, GFP_KERNEL); |
1286 | if (!nc) | 1295 | if (!nc) |
1287 | goto bad; | 1296 | goto bad; |
1288 | if (cachep->shared) { | 1297 | if (cachep->shared) { |
1289 | shared = alloc_arraycache(node, | 1298 | shared = alloc_arraycache(node, |
1290 | cachep->shared * cachep->batchcount, | 1299 | cachep->shared * cachep->batchcount, |
1291 | 0xbaadf00d); | 1300 | 0xbaadf00d, GFP_KERNEL); |
1292 | if (!shared) { | 1301 | if (!shared) { |
1293 | kfree(nc); | 1302 | kfree(nc); |
1294 | goto bad; | 1303 | goto bad; |
1295 | } | 1304 | } |
1296 | } | 1305 | } |
1297 | if (use_alien_caches) { | 1306 | if (use_alien_caches) { |
1298 | alien = alloc_alien_cache(node, cachep->limit); | 1307 | alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL); |
1299 | if (!alien) { | 1308 | if (!alien) { |
1300 | kfree(shared); | 1309 | kfree(shared); |
1301 | kfree(nc); | 1310 | kfree(nc); |
@@ -1399,10 +1408,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, | |||
1399 | { | 1408 | { |
1400 | struct kmem_list3 *ptr; | 1409 | struct kmem_list3 *ptr; |
1401 | 1410 | ||
1402 | ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid); | 1411 | ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid); |
1403 | BUG_ON(!ptr); | 1412 | BUG_ON(!ptr); |
1404 | 1413 | ||
1405 | local_irq_disable(); | ||
1406 | memcpy(ptr, list, sizeof(struct kmem_list3)); | 1414 | memcpy(ptr, list, sizeof(struct kmem_list3)); |
1407 | /* | 1415 | /* |
1408 | * Do not assume that spinlocks can be initialized via memcpy: | 1416 | * Do not assume that spinlocks can be initialized via memcpy: |
@@ -1411,7 +1419,6 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, | |||
1411 | 1419 | ||
1412 | MAKE_ALL_LISTS(cachep, ptr, nodeid); | 1420 | MAKE_ALL_LISTS(cachep, ptr, nodeid); |
1413 | cachep->nodelists[nodeid] = ptr; | 1421 | cachep->nodelists[nodeid] = ptr; |
1414 | local_irq_enable(); | ||
1415 | } | 1422 | } |
1416 | 1423 | ||
1417 | /* | 1424 | /* |
@@ -1575,9 +1582,8 @@ void __init kmem_cache_init(void) | |||
1575 | { | 1582 | { |
1576 | struct array_cache *ptr; | 1583 | struct array_cache *ptr; |
1577 | 1584 | ||
1578 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | 1585 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); |
1579 | 1586 | ||
1580 | local_irq_disable(); | ||
1581 | BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); | 1587 | BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); |
1582 | memcpy(ptr, cpu_cache_get(&cache_cache), | 1588 | memcpy(ptr, cpu_cache_get(&cache_cache), |
1583 | sizeof(struct arraycache_init)); | 1589 | sizeof(struct arraycache_init)); |
@@ -1587,11 +1593,9 @@ void __init kmem_cache_init(void) | |||
1587 | spin_lock_init(&ptr->lock); | 1593 | spin_lock_init(&ptr->lock); |
1588 | 1594 | ||
1589 | cache_cache.array[smp_processor_id()] = ptr; | 1595 | cache_cache.array[smp_processor_id()] = ptr; |
1590 | local_irq_enable(); | ||
1591 | 1596 | ||
1592 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | 1597 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); |
1593 | 1598 | ||
1594 | local_irq_disable(); | ||
1595 | BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) | 1599 | BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) |
1596 | != &initarray_generic.cache); | 1600 | != &initarray_generic.cache); |
1597 | memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), | 1601 | memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), |
@@ -1603,7 +1607,6 @@ void __init kmem_cache_init(void) | |||
1603 | 1607 | ||
1604 | malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = | 1608 | malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = |
1605 | ptr; | 1609 | ptr; |
1606 | local_irq_enable(); | ||
1607 | } | 1610 | } |
1608 | /* 5) Replace the bootstrap kmem_list3's */ | 1611 | /* 5) Replace the bootstrap kmem_list3's */ |
1609 | { | 1612 | { |
@@ -1627,7 +1630,7 @@ void __init kmem_cache_init(void) | |||
1627 | struct kmem_cache *cachep; | 1630 | struct kmem_cache *cachep; |
1628 | mutex_lock(&cache_chain_mutex); | 1631 | mutex_lock(&cache_chain_mutex); |
1629 | list_for_each_entry(cachep, &cache_chain, next) | 1632 | list_for_each_entry(cachep, &cache_chain, next) |
1630 | if (enable_cpucache(cachep)) | 1633 | if (enable_cpucache(cachep, GFP_NOWAIT)) |
1631 | BUG(); | 1634 | BUG(); |
1632 | mutex_unlock(&cache_chain_mutex); | 1635 | mutex_unlock(&cache_chain_mutex); |
1633 | } | 1636 | } |
@@ -2064,10 +2067,10 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
2064 | return left_over; | 2067 | return left_over; |
2065 | } | 2068 | } |
2066 | 2069 | ||
2067 | static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) | 2070 | static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) |
2068 | { | 2071 | { |
2069 | if (g_cpucache_up == FULL) | 2072 | if (g_cpucache_up == FULL) |
2070 | return enable_cpucache(cachep); | 2073 | return enable_cpucache(cachep, gfp); |
2071 | 2074 | ||
2072 | if (g_cpucache_up == NONE) { | 2075 | if (g_cpucache_up == NONE) { |
2073 | /* | 2076 | /* |
@@ -2089,7 +2092,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) | |||
2089 | g_cpucache_up = PARTIAL_AC; | 2092 | g_cpucache_up = PARTIAL_AC; |
2090 | } else { | 2093 | } else { |
2091 | cachep->array[smp_processor_id()] = | 2094 | cachep->array[smp_processor_id()] = |
2092 | kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | 2095 | kmalloc(sizeof(struct arraycache_init), gfp); |
2093 | 2096 | ||
2094 | if (g_cpucache_up == PARTIAL_AC) { | 2097 | if (g_cpucache_up == PARTIAL_AC) { |
2095 | set_up_list3s(cachep, SIZE_L3); | 2098 | set_up_list3s(cachep, SIZE_L3); |
@@ -2153,6 +2156,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2153 | { | 2156 | { |
2154 | size_t left_over, slab_size, ralign; | 2157 | size_t left_over, slab_size, ralign; |
2155 | struct kmem_cache *cachep = NULL, *pc; | 2158 | struct kmem_cache *cachep = NULL, *pc; |
2159 | gfp_t gfp; | ||
2156 | 2160 | ||
2157 | /* | 2161 | /* |
2158 | * Sanity checks... these are all serious usage bugs. | 2162 | * Sanity checks... these are all serious usage bugs. |
@@ -2168,8 +2172,10 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2168 | * We use cache_chain_mutex to ensure a consistent view of | 2172 | * We use cache_chain_mutex to ensure a consistent view of |
2169 | * cpu_online_mask as well. Please see cpuup_callback | 2173 | * cpu_online_mask as well. Please see cpuup_callback |
2170 | */ | 2174 | */ |
2171 | get_online_cpus(); | 2175 | if (slab_is_available()) { |
2172 | mutex_lock(&cache_chain_mutex); | 2176 | get_online_cpus(); |
2177 | mutex_lock(&cache_chain_mutex); | ||
2178 | } | ||
2173 | 2179 | ||
2174 | list_for_each_entry(pc, &cache_chain, next) { | 2180 | list_for_each_entry(pc, &cache_chain, next) { |
2175 | char tmp; | 2181 | char tmp; |
@@ -2278,8 +2284,13 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2278 | */ | 2284 | */ |
2279 | align = ralign; | 2285 | align = ralign; |
2280 | 2286 | ||
2287 | if (slab_is_available()) | ||
2288 | gfp = GFP_KERNEL; | ||
2289 | else | ||
2290 | gfp = GFP_NOWAIT; | ||
2291 | |||
2281 | /* Get cache's description obj. */ | 2292 | /* Get cache's description obj. */ |
2282 | cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL); | 2293 | cachep = kmem_cache_zalloc(&cache_cache, gfp); |
2283 | if (!cachep) | 2294 | if (!cachep) |
2284 | goto oops; | 2295 | goto oops; |
2285 | 2296 | ||
@@ -2382,7 +2393,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2382 | cachep->ctor = ctor; | 2393 | cachep->ctor = ctor; |
2383 | cachep->name = name; | 2394 | cachep->name = name; |
2384 | 2395 | ||
2385 | if (setup_cpu_cache(cachep)) { | 2396 | if (setup_cpu_cache(cachep, gfp)) { |
2386 | __kmem_cache_destroy(cachep); | 2397 | __kmem_cache_destroy(cachep); |
2387 | cachep = NULL; | 2398 | cachep = NULL; |
2388 | goto oops; | 2399 | goto oops; |
@@ -2394,8 +2405,10 @@ oops: | |||
2394 | if (!cachep && (flags & SLAB_PANIC)) | 2405 | if (!cachep && (flags & SLAB_PANIC)) |
2395 | panic("kmem_cache_create(): failed to create slab `%s'\n", | 2406 | panic("kmem_cache_create(): failed to create slab `%s'\n", |
2396 | name); | 2407 | name); |
2397 | mutex_unlock(&cache_chain_mutex); | 2408 | if (slab_is_available()) { |
2398 | put_online_cpus(); | 2409 | mutex_unlock(&cache_chain_mutex); |
2410 | put_online_cpus(); | ||
2411 | } | ||
2399 | return cachep; | 2412 | return cachep; |
2400 | } | 2413 | } |
2401 | EXPORT_SYMBOL(kmem_cache_create); | 2414 | EXPORT_SYMBOL(kmem_cache_create); |
@@ -2621,6 +2634,14 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, | |||
2621 | /* Slab management obj is off-slab. */ | 2634 | /* Slab management obj is off-slab. */ |
2622 | slabp = kmem_cache_alloc_node(cachep->slabp_cache, | 2635 | slabp = kmem_cache_alloc_node(cachep->slabp_cache, |
2623 | local_flags, nodeid); | 2636 | local_flags, nodeid); |
2637 | /* | ||
2638 | * If the first object in the slab is leaked (it's allocated | ||
2639 | * but no one has a reference to it), we want to make sure | ||
2640 | * kmemleak does not treat the ->s_mem pointer as a reference | ||
2641 | * to the object. Otherwise we will not report the leak. | ||
2642 | */ | ||
2643 | kmemleak_scan_area(slabp, offsetof(struct slab, list), | ||
2644 | sizeof(struct list_head), local_flags); | ||
2624 | if (!slabp) | 2645 | if (!slabp) |
2625 | return NULL; | 2646 | return NULL; |
2626 | } else { | 2647 | } else { |
@@ -3141,6 +3162,12 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3141 | STATS_INC_ALLOCMISS(cachep); | 3162 | STATS_INC_ALLOCMISS(cachep); |
3142 | objp = cache_alloc_refill(cachep, flags); | 3163 | objp = cache_alloc_refill(cachep, flags); |
3143 | } | 3164 | } |
3165 | /* | ||
3166 | * To avoid a false negative, if an object that is in one of the | ||
3167 | * per-CPU caches is leaked, we need to make sure kmemleak doesn't | ||
3168 | * treat the array pointers as a reference to the object. | ||
3169 | */ | ||
3170 | kmemleak_erase(&ac->entry[ac->avail]); | ||
3144 | return objp; | 3171 | return objp; |
3145 | } | 3172 | } |
3146 | 3173 | ||
@@ -3360,6 +3387,8 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, | |||
3360 | out: | 3387 | out: |
3361 | local_irq_restore(save_flags); | 3388 | local_irq_restore(save_flags); |
3362 | ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); | 3389 | ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); |
3390 | kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags, | ||
3391 | flags); | ||
3363 | 3392 | ||
3364 | if (unlikely((flags & __GFP_ZERO) && ptr)) | 3393 | if (unlikely((flags & __GFP_ZERO) && ptr)) |
3365 | memset(ptr, 0, obj_size(cachep)); | 3394 | memset(ptr, 0, obj_size(cachep)); |
@@ -3415,6 +3444,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | |||
3415 | objp = __do_cache_alloc(cachep, flags); | 3444 | objp = __do_cache_alloc(cachep, flags); |
3416 | local_irq_restore(save_flags); | 3445 | local_irq_restore(save_flags); |
3417 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); | 3446 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); |
3447 | kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags, | ||
3448 | flags); | ||
3418 | prefetchw(objp); | 3449 | prefetchw(objp); |
3419 | 3450 | ||
3420 | if (unlikely((flags & __GFP_ZERO) && objp)) | 3451 | if (unlikely((flags & __GFP_ZERO) && objp)) |
@@ -3530,6 +3561,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) | |||
3530 | struct array_cache *ac = cpu_cache_get(cachep); | 3561 | struct array_cache *ac = cpu_cache_get(cachep); |
3531 | 3562 | ||
3532 | check_irq_off(); | 3563 | check_irq_off(); |
3564 | kmemleak_free_recursive(objp, cachep->flags); | ||
3533 | objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); | 3565 | objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); |
3534 | 3566 | ||
3535 | /* | 3567 | /* |
@@ -3802,7 +3834,7 @@ EXPORT_SYMBOL_GPL(kmem_cache_name); | |||
3802 | /* | 3834 | /* |
3803 | * This initializes kmem_list3 or resizes various caches for all nodes. | 3835 | * This initializes kmem_list3 or resizes various caches for all nodes. |
3804 | */ | 3836 | */ |
3805 | static int alloc_kmemlist(struct kmem_cache *cachep) | 3837 | static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) |
3806 | { | 3838 | { |
3807 | int node; | 3839 | int node; |
3808 | struct kmem_list3 *l3; | 3840 | struct kmem_list3 *l3; |
@@ -3812,7 +3844,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) | |||
3812 | for_each_online_node(node) { | 3844 | for_each_online_node(node) { |
3813 | 3845 | ||
3814 | if (use_alien_caches) { | 3846 | if (use_alien_caches) { |
3815 | new_alien = alloc_alien_cache(node, cachep->limit); | 3847 | new_alien = alloc_alien_cache(node, cachep->limit, gfp); |
3816 | if (!new_alien) | 3848 | if (!new_alien) |
3817 | goto fail; | 3849 | goto fail; |
3818 | } | 3850 | } |
@@ -3821,7 +3853,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) | |||
3821 | if (cachep->shared) { | 3853 | if (cachep->shared) { |
3822 | new_shared = alloc_arraycache(node, | 3854 | new_shared = alloc_arraycache(node, |
3823 | cachep->shared*cachep->batchcount, | 3855 | cachep->shared*cachep->batchcount, |
3824 | 0xbaadf00d); | 3856 | 0xbaadf00d, gfp); |
3825 | if (!new_shared) { | 3857 | if (!new_shared) { |
3826 | free_alien_cache(new_alien); | 3858 | free_alien_cache(new_alien); |
3827 | goto fail; | 3859 | goto fail; |
@@ -3850,7 +3882,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) | |||
3850 | free_alien_cache(new_alien); | 3882 | free_alien_cache(new_alien); |
3851 | continue; | 3883 | continue; |
3852 | } | 3884 | } |
3853 | l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); | 3885 | l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node); |
3854 | if (!l3) { | 3886 | if (!l3) { |
3855 | free_alien_cache(new_alien); | 3887 | free_alien_cache(new_alien); |
3856 | kfree(new_shared); | 3888 | kfree(new_shared); |
@@ -3906,18 +3938,18 @@ static void do_ccupdate_local(void *info) | |||
3906 | 3938 | ||
3907 | /* Always called with the cache_chain_mutex held */ | 3939 | /* Always called with the cache_chain_mutex held */ |
3908 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | 3940 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, |
3909 | int batchcount, int shared) | 3941 | int batchcount, int shared, gfp_t gfp) |
3910 | { | 3942 | { |
3911 | struct ccupdate_struct *new; | 3943 | struct ccupdate_struct *new; |
3912 | int i; | 3944 | int i; |
3913 | 3945 | ||
3914 | new = kzalloc(sizeof(*new), GFP_KERNEL); | 3946 | new = kzalloc(sizeof(*new), gfp); |
3915 | if (!new) | 3947 | if (!new) |
3916 | return -ENOMEM; | 3948 | return -ENOMEM; |
3917 | 3949 | ||
3918 | for_each_online_cpu(i) { | 3950 | for_each_online_cpu(i) { |
3919 | new->new[i] = alloc_arraycache(cpu_to_node(i), limit, | 3951 | new->new[i] = alloc_arraycache(cpu_to_node(i), limit, |
3920 | batchcount); | 3952 | batchcount, gfp); |
3921 | if (!new->new[i]) { | 3953 | if (!new->new[i]) { |
3922 | for (i--; i >= 0; i--) | 3954 | for (i--; i >= 0; i--) |
3923 | kfree(new->new[i]); | 3955 | kfree(new->new[i]); |
@@ -3944,11 +3976,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | |||
3944 | kfree(ccold); | 3976 | kfree(ccold); |
3945 | } | 3977 | } |
3946 | kfree(new); | 3978 | kfree(new); |
3947 | return alloc_kmemlist(cachep); | 3979 | return alloc_kmemlist(cachep, gfp); |
3948 | } | 3980 | } |
3949 | 3981 | ||
3950 | /* Called with cache_chain_mutex held always */ | 3982 | /* Called with cache_chain_mutex held always */ |
3951 | static int enable_cpucache(struct kmem_cache *cachep) | 3983 | static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) |
3952 | { | 3984 | { |
3953 | int err; | 3985 | int err; |
3954 | int limit, shared; | 3986 | int limit, shared; |
@@ -3994,7 +4026,7 @@ static int enable_cpucache(struct kmem_cache *cachep) | |||
3994 | if (limit > 32) | 4026 | if (limit > 32) |
3995 | limit = 32; | 4027 | limit = 32; |
3996 | #endif | 4028 | #endif |
3997 | err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared); | 4029 | err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp); |
3998 | if (err) | 4030 | if (err) |
3999 | printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", | 4031 | printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", |
4000 | cachep->name, -err); | 4032 | cachep->name, -err); |
@@ -4300,7 +4332,8 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, | |||
4300 | res = 0; | 4332 | res = 0; |
4301 | } else { | 4333 | } else { |
4302 | res = do_tune_cpucache(cachep, limit, | 4334 | res = do_tune_cpucache(cachep, limit, |
4303 | batchcount, shared); | 4335 | batchcount, shared, |
4336 | GFP_KERNEL); | ||
4304 | } | 4337 | } |
4305 | break; | 4338 | break; |
4306 | } | 4339 | } |
@@ -67,6 +67,7 @@ | |||
67 | #include <linux/rcupdate.h> | 67 | #include <linux/rcupdate.h> |
68 | #include <linux/list.h> | 68 | #include <linux/list.h> |
69 | #include <linux/kmemtrace.h> | 69 | #include <linux/kmemtrace.h> |
70 | #include <linux/kmemleak.h> | ||
70 | #include <asm/atomic.h> | 71 | #include <asm/atomic.h> |
71 | 72 | ||
72 | /* | 73 | /* |
@@ -509,6 +510,7 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node) | |||
509 | size, PAGE_SIZE << order, gfp, node); | 510 | size, PAGE_SIZE << order, gfp, node); |
510 | } | 511 | } |
511 | 512 | ||
513 | kmemleak_alloc(ret, size, 1, gfp); | ||
512 | return ret; | 514 | return ret; |
513 | } | 515 | } |
514 | EXPORT_SYMBOL(__kmalloc_node); | 516 | EXPORT_SYMBOL(__kmalloc_node); |
@@ -521,6 +523,7 @@ void kfree(const void *block) | |||
521 | 523 | ||
522 | if (unlikely(ZERO_OR_NULL_PTR(block))) | 524 | if (unlikely(ZERO_OR_NULL_PTR(block))) |
523 | return; | 525 | return; |
526 | kmemleak_free(block); | ||
524 | 527 | ||
525 | sp = slob_page(block); | 528 | sp = slob_page(block); |
526 | if (is_slob_page(sp)) { | 529 | if (is_slob_page(sp)) { |
@@ -584,12 +587,14 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, | |||
584 | } else if (flags & SLAB_PANIC) | 587 | } else if (flags & SLAB_PANIC) |
585 | panic("Cannot create slab cache %s\n", name); | 588 | panic("Cannot create slab cache %s\n", name); |
586 | 589 | ||
590 | kmemleak_alloc(c, sizeof(struct kmem_cache), 1, GFP_KERNEL); | ||
587 | return c; | 591 | return c; |
588 | } | 592 | } |
589 | EXPORT_SYMBOL(kmem_cache_create); | 593 | EXPORT_SYMBOL(kmem_cache_create); |
590 | 594 | ||
591 | void kmem_cache_destroy(struct kmem_cache *c) | 595 | void kmem_cache_destroy(struct kmem_cache *c) |
592 | { | 596 | { |
597 | kmemleak_free(c); | ||
593 | slob_free(c, sizeof(struct kmem_cache)); | 598 | slob_free(c, sizeof(struct kmem_cache)); |
594 | } | 599 | } |
595 | EXPORT_SYMBOL(kmem_cache_destroy); | 600 | EXPORT_SYMBOL(kmem_cache_destroy); |
@@ -613,6 +618,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) | |||
613 | if (c->ctor) | 618 | if (c->ctor) |
614 | c->ctor(b); | 619 | c->ctor(b); |
615 | 620 | ||
621 | kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags); | ||
616 | return b; | 622 | return b; |
617 | } | 623 | } |
618 | EXPORT_SYMBOL(kmem_cache_alloc_node); | 624 | EXPORT_SYMBOL(kmem_cache_alloc_node); |
@@ -635,6 +641,7 @@ static void kmem_rcu_free(struct rcu_head *head) | |||
635 | 641 | ||
636 | void kmem_cache_free(struct kmem_cache *c, void *b) | 642 | void kmem_cache_free(struct kmem_cache *c, void *b) |
637 | { | 643 | { |
644 | kmemleak_free_recursive(b, c->flags); | ||
638 | if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) { | 645 | if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) { |
639 | struct slob_rcu *slob_rcu; | 646 | struct slob_rcu *slob_rcu; |
640 | slob_rcu = b + (c->size - sizeof(struct slob_rcu)); | 647 | slob_rcu = b + (c->size - sizeof(struct slob_rcu)); |
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/kmemtrace.h> | 20 | #include <linux/kmemtrace.h> |
21 | #include <linux/cpu.h> | 21 | #include <linux/cpu.h> |
22 | #include <linux/cpuset.h> | 22 | #include <linux/cpuset.h> |
23 | #include <linux/kmemleak.h> | ||
23 | #include <linux/mempolicy.h> | 24 | #include <linux/mempolicy.h> |
24 | #include <linux/ctype.h> | 25 | #include <linux/ctype.h> |
25 | #include <linux/debugobjects.h> | 26 | #include <linux/debugobjects.h> |
@@ -143,7 +144,7 @@ | |||
143 | * Set of flags that will prevent slab merging | 144 | * Set of flags that will prevent slab merging |
144 | */ | 145 | */ |
145 | #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ | 146 | #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ |
146 | SLAB_TRACE | SLAB_DESTROY_BY_RCU) | 147 | SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE) |
147 | 148 | ||
148 | #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ | 149 | #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ |
149 | SLAB_CACHE_DMA) | 150 | SLAB_CACHE_DMA) |
@@ -1617,6 +1618,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
1617 | if (unlikely((gfpflags & __GFP_ZERO) && object)) | 1618 | if (unlikely((gfpflags & __GFP_ZERO) && object)) |
1618 | memset(object, 0, objsize); | 1619 | memset(object, 0, objsize); |
1619 | 1620 | ||
1621 | kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags); | ||
1620 | return object; | 1622 | return object; |
1621 | } | 1623 | } |
1622 | 1624 | ||
@@ -1746,6 +1748,7 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
1746 | struct kmem_cache_cpu *c; | 1748 | struct kmem_cache_cpu *c; |
1747 | unsigned long flags; | 1749 | unsigned long flags; |
1748 | 1750 | ||
1751 | kmemleak_free_recursive(x, s->flags); | ||
1749 | local_irq_save(flags); | 1752 | local_irq_save(flags); |
1750 | c = get_cpu_slab(s, smp_processor_id()); | 1753 | c = get_cpu_slab(s, smp_processor_id()); |
1751 | debug_check_no_locks_freed(object, c->objsize); | 1754 | debug_check_no_locks_freed(object, c->objsize); |
@@ -2557,13 +2560,16 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, | |||
2557 | if (gfp_flags & SLUB_DMA) | 2560 | if (gfp_flags & SLUB_DMA) |
2558 | flags = SLAB_CACHE_DMA; | 2561 | flags = SLAB_CACHE_DMA; |
2559 | 2562 | ||
2560 | down_write(&slub_lock); | 2563 | /* |
2564 | * This function is called with IRQs disabled during early-boot on | ||
2565 | * single CPU so there's no need to take slub_lock here. | ||
2566 | */ | ||
2561 | if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, | 2567 | if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, |
2562 | flags, NULL)) | 2568 | flags, NULL)) |
2563 | goto panic; | 2569 | goto panic; |
2564 | 2570 | ||
2565 | list_add(&s->list, &slab_caches); | 2571 | list_add(&s->list, &slab_caches); |
2566 | up_write(&slub_lock); | 2572 | |
2567 | if (sysfs_slab_add(s)) | 2573 | if (sysfs_slab_add(s)) |
2568 | goto panic; | 2574 | goto panic; |
2569 | return s; | 2575 | return s; |
@@ -3021,7 +3027,7 @@ void __init kmem_cache_init(void) | |||
3021 | * kmem_cache_open for slab_state == DOWN. | 3027 | * kmem_cache_open for slab_state == DOWN. |
3022 | */ | 3028 | */ |
3023 | create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", | 3029 | create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", |
3024 | sizeof(struct kmem_cache_node), GFP_KERNEL); | 3030 | sizeof(struct kmem_cache_node), GFP_NOWAIT); |
3025 | kmalloc_caches[0].refcount = -1; | 3031 | kmalloc_caches[0].refcount = -1; |
3026 | caches++; | 3032 | caches++; |
3027 | 3033 | ||
@@ -3034,16 +3040,16 @@ void __init kmem_cache_init(void) | |||
3034 | /* Caches that are not of the two-to-the-power-of size */ | 3040 | /* Caches that are not of the two-to-the-power-of size */ |
3035 | if (KMALLOC_MIN_SIZE <= 64) { | 3041 | if (KMALLOC_MIN_SIZE <= 64) { |
3036 | create_kmalloc_cache(&kmalloc_caches[1], | 3042 | create_kmalloc_cache(&kmalloc_caches[1], |
3037 | "kmalloc-96", 96, GFP_KERNEL); | 3043 | "kmalloc-96", 96, GFP_NOWAIT); |
3038 | caches++; | 3044 | caches++; |
3039 | create_kmalloc_cache(&kmalloc_caches[2], | 3045 | create_kmalloc_cache(&kmalloc_caches[2], |
3040 | "kmalloc-192", 192, GFP_KERNEL); | 3046 | "kmalloc-192", 192, GFP_NOWAIT); |
3041 | caches++; | 3047 | caches++; |
3042 | } | 3048 | } |
3043 | 3049 | ||
3044 | for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { | 3050 | for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { |
3045 | create_kmalloc_cache(&kmalloc_caches[i], | 3051 | create_kmalloc_cache(&kmalloc_caches[i], |
3046 | "kmalloc", 1 << i, GFP_KERNEL); | 3052 | "kmalloc", 1 << i, GFP_NOWAIT); |
3047 | caches++; | 3053 | caches++; |
3048 | } | 3054 | } |
3049 | 3055 | ||
@@ -3080,7 +3086,7 @@ void __init kmem_cache_init(void) | |||
3080 | /* Provide the correct kmalloc names now that the caches are up */ | 3086 | /* Provide the correct kmalloc names now that the caches are up */ |
3081 | for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) | 3087 | for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) |
3082 | kmalloc_caches[i]. name = | 3088 | kmalloc_caches[i]. name = |
3083 | kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); | 3089 | kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); |
3084 | 3090 | ||
3085 | #ifdef CONFIG_SMP | 3091 | #ifdef CONFIG_SMP |
3086 | register_cpu_notifier(&slab_notifier); | 3092 | register_cpu_notifier(&slab_notifier); |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 083716ea38c9..f8189a4b3e13 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -23,8 +23,8 @@ | |||
23 | #include <linux/rbtree.h> | 23 | #include <linux/rbtree.h> |
24 | #include <linux/radix-tree.h> | 24 | #include <linux/radix-tree.h> |
25 | #include <linux/rcupdate.h> | 25 | #include <linux/rcupdate.h> |
26 | #include <linux/bootmem.h> | ||
27 | #include <linux/pfn.h> | 26 | #include <linux/pfn.h> |
27 | #include <linux/kmemleak.h> | ||
28 | 28 | ||
29 | #include <asm/atomic.h> | 29 | #include <asm/atomic.h> |
30 | #include <asm/uaccess.h> | 30 | #include <asm/uaccess.h> |
@@ -1032,7 +1032,7 @@ void __init vmalloc_init(void) | |||
1032 | 1032 | ||
1033 | /* Import existing vmlist entries. */ | 1033 | /* Import existing vmlist entries. */ |
1034 | for (tmp = vmlist; tmp; tmp = tmp->next) { | 1034 | for (tmp = vmlist; tmp; tmp = tmp->next) { |
1035 | va = alloc_bootmem(sizeof(struct vmap_area)); | 1035 | va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT); |
1036 | va->flags = tmp->flags | VM_VM_AREA; | 1036 | va->flags = tmp->flags | VM_VM_AREA; |
1037 | va->va_start = (unsigned long)tmp->addr; | 1037 | va->va_start = (unsigned long)tmp->addr; |
1038 | va->va_end = va->va_start + tmp->size; | 1038 | va->va_end = va->va_start + tmp->size; |
@@ -1327,6 +1327,9 @@ static void __vunmap(const void *addr, int deallocate_pages) | |||
1327 | void vfree(const void *addr) | 1327 | void vfree(const void *addr) |
1328 | { | 1328 | { |
1329 | BUG_ON(in_interrupt()); | 1329 | BUG_ON(in_interrupt()); |
1330 | |||
1331 | kmemleak_free(addr); | ||
1332 | |||
1330 | __vunmap(addr, 1); | 1333 | __vunmap(addr, 1); |
1331 | } | 1334 | } |
1332 | EXPORT_SYMBOL(vfree); | 1335 | EXPORT_SYMBOL(vfree); |
@@ -1439,8 +1442,17 @@ fail: | |||
1439 | 1442 | ||
1440 | void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) | 1443 | void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) |
1441 | { | 1444 | { |
1442 | return __vmalloc_area_node(area, gfp_mask, prot, -1, | 1445 | void *addr = __vmalloc_area_node(area, gfp_mask, prot, -1, |
1443 | __builtin_return_address(0)); | 1446 | __builtin_return_address(0)); |
1447 | |||
1448 | /* | ||
1449 | * A ref_count = 3 is needed because the vm_struct and vmap_area | ||
1450 | * structures allocated in the __get_vm_area_node() function contain | ||
1451 | * references to the virtual address of the vmalloc'ed block. | ||
1452 | */ | ||
1453 | kmemleak_alloc(addr, area->size - PAGE_SIZE, 3, gfp_mask); | ||
1454 | |||
1455 | return addr; | ||
1444 | } | 1456 | } |
1445 | 1457 | ||
1446 | /** | 1458 | /** |
@@ -1459,6 +1471,8 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, | |||
1459 | int node, void *caller) | 1471 | int node, void *caller) |
1460 | { | 1472 | { |
1461 | struct vm_struct *area; | 1473 | struct vm_struct *area; |
1474 | void *addr; | ||
1475 | unsigned long real_size = size; | ||
1462 | 1476 | ||
1463 | size = PAGE_ALIGN(size); | 1477 | size = PAGE_ALIGN(size); |
1464 | if (!size || (size >> PAGE_SHIFT) > num_physpages) | 1478 | if (!size || (size >> PAGE_SHIFT) > num_physpages) |
@@ -1470,7 +1484,16 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, | |||
1470 | if (!area) | 1484 | if (!area) |
1471 | return NULL; | 1485 | return NULL; |
1472 | 1486 | ||
1473 | return __vmalloc_area_node(area, gfp_mask, prot, node, caller); | 1487 | addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); |
1488 | |||
1489 | /* | ||
1490 | * A ref_count = 3 is needed because the vm_struct and vmap_area | ||
1491 | * structures allocated in the __get_vm_area_node() function contain | ||
1492 | * references to the virtual address of the vmalloc'ed block. | ||
1493 | */ | ||
1494 | kmemleak_alloc(addr, real_size, 3, gfp_mask); | ||
1495 | |||
1496 | return addr; | ||
1474 | } | 1497 | } |
1475 | 1498 | ||
1476 | void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) | 1499 | void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) |
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore new file mode 100644 index 000000000000..d69a759a1046 --- /dev/null +++ b/tools/perf/.gitignore | |||
@@ -0,0 +1,16 @@ | |||
1 | PERF-BUILD-OPTIONS | ||
2 | PERF-CFLAGS | ||
3 | PERF-GUI-VARS | ||
4 | PERF-VERSION-FILE | ||
5 | perf | ||
6 | perf-help | ||
7 | perf-record | ||
8 | perf-report | ||
9 | perf-stat | ||
10 | perf-top | ||
11 | perf*.1 | ||
12 | perf*.xml | ||
13 | common-cmds.h | ||
14 | tags | ||
15 | TAGS | ||
16 | cscope* | ||
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile new file mode 100644 index 000000000000..5457192e1b41 --- /dev/null +++ b/tools/perf/Documentation/Makefile | |||
@@ -0,0 +1,300 @@ | |||
1 | MAN1_TXT= \ | ||
2 | $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ | ||
3 | $(wildcard perf-*.txt)) \ | ||
4 | perf.txt | ||
5 | MAN5_TXT= | ||
6 | MAN7_TXT= | ||
7 | |||
8 | MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT) | ||
9 | MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) | ||
10 | MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) | ||
11 | |||
12 | DOC_HTML=$(MAN_HTML) | ||
13 | |||
14 | ARTICLES = | ||
15 | # with their own formatting rules. | ||
16 | SP_ARTICLES = | ||
17 | API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt))) | ||
18 | SP_ARTICLES += $(API_DOCS) | ||
19 | SP_ARTICLES += technical/api-index | ||
20 | |||
21 | DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES)) | ||
22 | |||
23 | DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) | ||
24 | DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) | ||
25 | DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) | ||
26 | |||
27 | prefix?=$(HOME) | ||
28 | bindir?=$(prefix)/bin | ||
29 | htmldir?=$(prefix)/share/doc/perf-doc | ||
30 | pdfdir?=$(prefix)/share/doc/perf-doc | ||
31 | mandir?=$(prefix)/share/man | ||
32 | man1dir=$(mandir)/man1 | ||
33 | man5dir=$(mandir)/man5 | ||
34 | man7dir=$(mandir)/man7 | ||
35 | # DESTDIR= | ||
36 | |||
37 | ASCIIDOC=asciidoc | ||
38 | ASCIIDOC_EXTRA = | ||
39 | MANPAGE_XSL = manpage-normal.xsl | ||
40 | XMLTO_EXTRA = | ||
41 | INSTALL?=install | ||
42 | RM ?= rm -f | ||
43 | DOC_REF = origin/man | ||
44 | HTML_REF = origin/html | ||
45 | |||
46 | infodir?=$(prefix)/share/info | ||
47 | MAKEINFO=makeinfo | ||
48 | INSTALL_INFO=install-info | ||
49 | DOCBOOK2X_TEXI=docbook2x-texi | ||
50 | DBLATEX=dblatex | ||
51 | ifndef PERL_PATH | ||
52 | PERL_PATH = /usr/bin/perl | ||
53 | endif | ||
54 | |||
55 | -include ../config.mak.autogen | ||
56 | -include ../config.mak | ||
57 | |||
58 | # | ||
59 | # For asciidoc ... | ||
60 | # -7.1.2, no extra settings are needed. | ||
61 | # 8.0-, set ASCIIDOC8. | ||
62 | # | ||
63 | |||
64 | # | ||
65 | # For docbook-xsl ... | ||
66 | # -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0) | ||
67 | # 1.69.0, no extra settings are needed? | ||
68 | # 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP? | ||
69 | # 1.71.1, no extra settings are needed? | ||
70 | # 1.72.0, set DOCBOOK_XSL_172. | ||
71 | # 1.73.0-, set ASCIIDOC_NO_ROFF | ||
72 | # | ||
73 | |||
74 | # | ||
75 | # If you had been using DOCBOOK_XSL_172 in an attempt to get rid | ||
76 | # of 'the ".ft C" problem' in your generated manpages, and you | ||
77 | # instead ended up with weird characters around callouts, try | ||
78 | # using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8). | ||
79 | # | ||
80 | |||
81 | ifdef ASCIIDOC8 | ||
82 | ASCIIDOC_EXTRA += -a asciidoc7compatible | ||
83 | endif | ||
84 | ifdef DOCBOOK_XSL_172 | ||
85 | ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff | ||
86 | MANPAGE_XSL = manpage-1.72.xsl | ||
87 | else | ||
88 | ifdef ASCIIDOC_NO_ROFF | ||
89 | # docbook-xsl after 1.72 needs the regular XSL, but will not | ||
90 | # pass-thru raw roff codes from asciidoc.conf, so turn them off. | ||
91 | ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff | ||
92 | endif | ||
93 | endif | ||
94 | ifdef MAN_BOLD_LITERAL | ||
95 | XMLTO_EXTRA += -m manpage-bold-literal.xsl | ||
96 | endif | ||
97 | ifdef DOCBOOK_SUPPRESS_SP | ||
98 | XMLTO_EXTRA += -m manpage-suppress-sp.xsl | ||
99 | endif | ||
100 | |||
101 | SHELL_PATH ?= $(SHELL) | ||
102 | # Shell quote; | ||
103 | SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) | ||
104 | |||
105 | # | ||
106 | # Please note that there is a minor bug in asciidoc. | ||
107 | # The version after 6.0.3 _will_ include the patch found here: | ||
108 | # http://marc.theaimsgroup.com/?l=perf&m=111558757202243&w=2 | ||
109 | # | ||
110 | # Until that version is released you may have to apply the patch | ||
111 | # yourself - yes, all 6 characters of it! | ||
112 | # | ||
113 | |||
114 | QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir | ||
115 | QUIET_SUBDIR1 = | ||
116 | |||
117 | ifneq ($(findstring $(MAKEFLAGS),w),w) | ||
118 | PRINT_DIR = --no-print-directory | ||
119 | else # "make -w" | ||
120 | NO_SUBDIR = : | ||
121 | endif | ||
122 | |||
123 | ifneq ($(findstring $(MAKEFLAGS),s),s) | ||
124 | ifndef V | ||
125 | QUIET_ASCIIDOC = @echo ' ' ASCIIDOC $@; | ||
126 | QUIET_XMLTO = @echo ' ' XMLTO $@; | ||
127 | QUIET_DB2TEXI = @echo ' ' DB2TEXI $@; | ||
128 | QUIET_MAKEINFO = @echo ' ' MAKEINFO $@; | ||
129 | QUIET_DBLATEX = @echo ' ' DBLATEX $@; | ||
130 | QUIET_XSLTPROC = @echo ' ' XSLTPROC $@; | ||
131 | QUIET_GEN = @echo ' ' GEN $@; | ||
132 | QUIET_STDERR = 2> /dev/null | ||
133 | QUIET_SUBDIR0 = +@subdir= | ||
134 | QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ | ||
135 | $(MAKE) $(PRINT_DIR) -C $$subdir | ||
136 | export V | ||
137 | endif | ||
138 | endif | ||
139 | |||
140 | all: html man | ||
141 | |||
142 | html: $(DOC_HTML) | ||
143 | |||
144 | $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7): asciidoc.conf | ||
145 | |||
146 | man: man1 man5 man7 | ||
147 | man1: $(DOC_MAN1) | ||
148 | man5: $(DOC_MAN5) | ||
149 | man7: $(DOC_MAN7) | ||
150 | |||
151 | info: perf.info perfman.info | ||
152 | |||
153 | pdf: user-manual.pdf | ||
154 | |||
155 | install: install-man | ||
156 | |||
157 | install-man: man | ||
158 | $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir) | ||
159 | # $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir) | ||
160 | # $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir) | ||
161 | $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir) | ||
162 | # $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir) | ||
163 | # $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir) | ||
164 | |||
165 | install-info: info | ||
166 | $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) | ||
167 | $(INSTALL) -m 644 perf.info perfman.info $(DESTDIR)$(infodir) | ||
168 | if test -r $(DESTDIR)$(infodir)/dir; then \ | ||
169 | $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ | ||
170 | $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ | ||
171 | else \ | ||
172 | echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \ | ||
173 | fi | ||
174 | |||
175 | install-pdf: pdf | ||
176 | $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) | ||
177 | $(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir) | ||
178 | |||
179 | install-html: html | ||
180 | '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) | ||
181 | |||
182 | ../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE | ||
183 | $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE | ||
184 | |||
185 | -include ../PERF-VERSION-FILE | ||
186 | |||
187 | # | ||
188 | # Determine "include::" file references in asciidoc files. | ||
189 | # | ||
190 | doc.dep : $(wildcard *.txt) build-docdep.perl | ||
191 | $(QUIET_GEN)$(RM) $@+ $@ && \ | ||
192 | $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ | ||
193 | mv $@+ $@ | ||
194 | |||
195 | -include doc.dep | ||
196 | |||
197 | cmds_txt = cmds-ancillaryinterrogators.txt \ | ||
198 | cmds-ancillarymanipulators.txt \ | ||
199 | cmds-mainporcelain.txt \ | ||
200 | cmds-plumbinginterrogators.txt \ | ||
201 | cmds-plumbingmanipulators.txt \ | ||
202 | cmds-synchingrepositories.txt \ | ||
203 | cmds-synchelpers.txt \ | ||
204 | cmds-purehelpers.txt \ | ||
205 | cmds-foreignscminterface.txt | ||
206 | |||
207 | $(cmds_txt): cmd-list.made | ||
208 | |||
209 | cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) | ||
210 | $(QUIET_GEN)$(RM) $@ && \ | ||
211 | $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \ | ||
212 | date >$@ | ||
213 | |||
214 | clean: | ||
215 | $(RM) *.xml *.xml+ *.html *.html+ *.1 *.5 *.7 | ||
216 | $(RM) *.texi *.texi+ *.texi++ perf.info perfman.info | ||
217 | $(RM) howto-index.txt howto/*.html doc.dep | ||
218 | $(RM) technical/api-*.html technical/api-index.txt | ||
219 | $(RM) $(cmds_txt) *.made | ||
220 | |||
221 | $(MAN_HTML): %.html : %.txt | ||
222 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ | ||
223 | $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \ | ||
224 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ | ||
225 | mv $@+ $@ | ||
226 | |||
227 | %.1 %.5 %.7 : %.xml | ||
228 | $(QUIET_XMLTO)$(RM) $@ && \ | ||
229 | xmlto -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< | ||
230 | |||
231 | %.xml : %.txt | ||
232 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ | ||
233 | $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ | ||
234 | $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ | ||
235 | mv $@+ $@ | ||
236 | |||
237 | XSLT = docbook.xsl | ||
238 | XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css | ||
239 | |||
240 | user-manual.html: user-manual.xml | ||
241 | $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $< | ||
242 | |||
243 | perf.info: user-manual.texi | ||
244 | $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi | ||
245 | |||
246 | user-manual.texi: user-manual.xml | ||
247 | $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ | ||
248 | $(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ | ||
249 | $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \ | ||
250 | rm $@++ && \ | ||
251 | mv $@+ $@ | ||
252 | |||
253 | user-manual.pdf: user-manual.xml | ||
254 | $(QUIET_DBLATEX)$(RM) $@+ $@ && \ | ||
255 | $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \ | ||
256 | mv $@+ $@ | ||
257 | |||
258 | perfman.texi: $(MAN_XML) cat-texi.perl | ||
259 | $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ | ||
260 | ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \ | ||
261 | --to-stdout $(xml) &&) true) > $@++ && \ | ||
262 | $(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \ | ||
263 | rm $@++ && \ | ||
264 | mv $@+ $@ | ||
265 | |||
266 | perfman.info: perfman.texi | ||
267 | $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi | ||
268 | |||
269 | $(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml | ||
270 | $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ | ||
271 | $(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \ | ||
272 | mv $@+ $@ | ||
273 | |||
274 | howto-index.txt: howto-index.sh $(wildcard howto/*.txt) | ||
275 | $(QUIET_GEN)$(RM) $@+ $@ && \ | ||
276 | '$(SHELL_PATH_SQ)' ./howto-index.sh $(wildcard howto/*.txt) >$@+ && \ | ||
277 | mv $@+ $@ | ||
278 | |||
279 | $(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt | ||
280 | $(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt | ||
281 | |||
282 | WEBDOC_DEST = /pub/software/tools/perf/docs | ||
283 | |||
284 | $(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt | ||
285 | $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ | ||
286 | sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \ | ||
287 | mv $@+ $@ | ||
288 | |||
289 | install-webdoc : html | ||
290 | '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST) | ||
291 | |||
292 | quick-install: quick-install-man | ||
293 | |||
294 | quick-install-man: | ||
295 | '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir) | ||
296 | |||
297 | quick-install-html: | ||
298 | '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir) | ||
299 | |||
300 | .PHONY: .FORCE-PERF-VERSION-FILE | ||
diff --git a/tools/perf/Documentation/asciidoc.conf b/tools/perf/Documentation/asciidoc.conf new file mode 100644 index 000000000000..356b23a40339 --- /dev/null +++ b/tools/perf/Documentation/asciidoc.conf | |||
@@ -0,0 +1,91 @@ | |||
1 | ## linkperf: macro | ||
2 | # | ||
3 | # Usage: linkperf:command[manpage-section] | ||
4 | # | ||
5 | # Note, {0} is the manpage section, while {target} is the command. | ||
6 | # | ||
7 | # Show PERF link as: <command>(<section>); if section is defined, else just show | ||
8 | # the command. | ||
9 | |||
10 | [macros] | ||
11 | (?su)[\\]?(?P<name>linkperf):(?P<target>\S*?)\[(?P<attrlist>.*?)\]= | ||
12 | |||
13 | [attributes] | ||
14 | asterisk=* | ||
15 | plus=+ | ||
16 | caret=^ | ||
17 | startsb=[ | ||
18 | endsb=] | ||
19 | tilde=~ | ||
20 | |||
21 | ifdef::backend-docbook[] | ||
22 | [linkperf-inlinemacro] | ||
23 | {0%{target}} | ||
24 | {0#<citerefentry>} | ||
25 | {0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>} | ||
26 | {0#</citerefentry>} | ||
27 | endif::backend-docbook[] | ||
28 | |||
29 | ifdef::backend-docbook[] | ||
30 | ifndef::perf-asciidoc-no-roff[] | ||
31 | # "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this. | ||
32 | # v1.72 breaks with this because it replaces dots not in roff requests. | ||
33 | [listingblock] | ||
34 | <example><title>{title}</title> | ||
35 | <literallayout> | ||
36 | ifdef::doctype-manpage[] | ||
37 | .ft C | ||
38 | endif::doctype-manpage[] | ||
39 | | | ||
40 | ifdef::doctype-manpage[] | ||
41 | .ft | ||
42 | endif::doctype-manpage[] | ||
43 | </literallayout> | ||
44 | {title#}</example> | ||
45 | endif::perf-asciidoc-no-roff[] | ||
46 | |||
47 | ifdef::perf-asciidoc-no-roff[] | ||
48 | ifdef::doctype-manpage[] | ||
49 | # The following two small workarounds insert a simple paragraph after screen | ||
50 | [listingblock] | ||
51 | <example><title>{title}</title> | ||
52 | <literallayout> | ||
53 | | | ||
54 | </literallayout><simpara></simpara> | ||
55 | {title#}</example> | ||
56 | |||
57 | [verseblock] | ||
58 | <formalpara{id? id="{id}"}><title>{title}</title><para> | ||
59 | {title%}<literallayout{id? id="{id}"}> | ||
60 | {title#}<literallayout> | ||
61 | | | ||
62 | </literallayout> | ||
63 | {title#}</para></formalpara> | ||
64 | {title%}<simpara></simpara> | ||
65 | endif::doctype-manpage[] | ||
66 | endif::perf-asciidoc-no-roff[] | ||
67 | endif::backend-docbook[] | ||
68 | |||
69 | ifdef::doctype-manpage[] | ||
70 | ifdef::backend-docbook[] | ||
71 | [header] | ||
72 | template::[header-declarations] | ||
73 | <refentry> | ||
74 | <refmeta> | ||
75 | <refentrytitle>{mantitle}</refentrytitle> | ||
76 | <manvolnum>{manvolnum}</manvolnum> | ||
77 | <refmiscinfo class="source">perf</refmiscinfo> | ||
78 | <refmiscinfo class="version">{perf_version}</refmiscinfo> | ||
79 | <refmiscinfo class="manual">perf Manual</refmiscinfo> | ||
80 | </refmeta> | ||
81 | <refnamediv> | ||
82 | <refname>{manname}</refname> | ||
83 | <refpurpose>{manpurpose}</refpurpose> | ||
84 | </refnamediv> | ||
85 | endif::backend-docbook[] | ||
86 | endif::doctype-manpage[] | ||
87 | |||
88 | ifdef::backend-xhtml11[] | ||
89 | [linkperf-inlinemacro] | ||
90 | <a href="{target}.html">{target}{0?({0})}</a> | ||
91 | endif::backend-xhtml11[] | ||
diff --git a/tools/perf/Documentation/manpage-1.72.xsl b/tools/perf/Documentation/manpage-1.72.xsl new file mode 100644 index 000000000000..b4d315cb8c47 --- /dev/null +++ b/tools/perf/Documentation/manpage-1.72.xsl | |||
@@ -0,0 +1,14 @@ | |||
1 | <!-- manpage-1.72.xsl: | ||
2 | special settings for manpages rendered from asciidoc+docbook | ||
3 | handles peculiarities in docbook-xsl 1.72.0 --> | ||
4 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | ||
5 | version="1.0"> | ||
6 | |||
7 | <xsl:import href="manpage-base.xsl"/> | ||
8 | |||
9 | <!-- these are the special values for the roff control characters | ||
10 | needed for docbook-xsl 1.72.0 --> | ||
11 | <xsl:param name="git.docbook.backslash">▓</xsl:param> | ||
12 | <xsl:param name="git.docbook.dot" >⌂</xsl:param> | ||
13 | |||
14 | </xsl:stylesheet> | ||
diff --git a/tools/perf/Documentation/manpage-base.xsl b/tools/perf/Documentation/manpage-base.xsl new file mode 100644 index 000000000000..a264fa616093 --- /dev/null +++ b/tools/perf/Documentation/manpage-base.xsl | |||
@@ -0,0 +1,35 @@ | |||
1 | <!-- manpage-base.xsl: | ||
2 | special formatting for manpages rendered from asciidoc+docbook --> | ||
3 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | ||
4 | version="1.0"> | ||
5 | |||
6 | <!-- these params silence some output from xmlto --> | ||
7 | <xsl:param name="man.output.quietly" select="1"/> | ||
8 | <xsl:param name="refentry.meta.get.quietly" select="1"/> | ||
9 | |||
10 | <!-- convert asciidoc callouts to man page format; | ||
11 | git.docbook.backslash and git.docbook.dot params | ||
12 | must be supplied by another XSL file or other means --> | ||
13 | <xsl:template match="co"> | ||
14 | <xsl:value-of select="concat( | ||
15 | $git.docbook.backslash,'fB(', | ||
16 | substring-after(@id,'-'),')', | ||
17 | $git.docbook.backslash,'fR')"/> | ||
18 | </xsl:template> | ||
19 | <xsl:template match="calloutlist"> | ||
20 | <xsl:value-of select="$git.docbook.dot"/> | ||
21 | <xsl:text>sp </xsl:text> | ||
22 | <xsl:apply-templates/> | ||
23 | <xsl:text> </xsl:text> | ||
24 | </xsl:template> | ||
25 | <xsl:template match="callout"> | ||
26 | <xsl:value-of select="concat( | ||
27 | $git.docbook.backslash,'fB', | ||
28 | substring-after(@arearefs,'-'), | ||
29 | '. ',$git.docbook.backslash,'fR')"/> | ||
30 | <xsl:apply-templates/> | ||
31 | <xsl:value-of select="$git.docbook.dot"/> | ||
32 | <xsl:text>br </xsl:text> | ||
33 | </xsl:template> | ||
34 | |||
35 | </xsl:stylesheet> | ||
diff --git a/tools/perf/Documentation/manpage-bold-literal.xsl b/tools/perf/Documentation/manpage-bold-literal.xsl new file mode 100644 index 000000000000..608eb5df6281 --- /dev/null +++ b/tools/perf/Documentation/manpage-bold-literal.xsl | |||
@@ -0,0 +1,17 @@ | |||
1 | <!-- manpage-bold-literal.xsl: | ||
2 | special formatting for manpages rendered from asciidoc+docbook --> | ||
3 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | ||
4 | version="1.0"> | ||
5 | |||
6 | <!-- render literal text as bold (instead of plain or monospace); | ||
7 | this makes literal text easier to distinguish in manpages | ||
8 | viewed on a tty --> | ||
9 | <xsl:template match="literal"> | ||
10 | <xsl:value-of select="$git.docbook.backslash"/> | ||
11 | <xsl:text>fB</xsl:text> | ||
12 | <xsl:apply-templates/> | ||
13 | <xsl:value-of select="$git.docbook.backslash"/> | ||
14 | <xsl:text>fR</xsl:text> | ||
15 | </xsl:template> | ||
16 | |||
17 | </xsl:stylesheet> | ||
diff --git a/tools/perf/Documentation/manpage-normal.xsl b/tools/perf/Documentation/manpage-normal.xsl new file mode 100644 index 000000000000..a48f5b11f3dc --- /dev/null +++ b/tools/perf/Documentation/manpage-normal.xsl | |||
@@ -0,0 +1,13 @@ | |||
1 | <!-- manpage-normal.xsl: | ||
2 | special settings for manpages rendered from asciidoc+docbook | ||
3 | handles anything we want to keep away from docbook-xsl 1.72.0 --> | ||
4 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | ||
5 | version="1.0"> | ||
6 | |||
7 | <xsl:import href="manpage-base.xsl"/> | ||
8 | |||
9 | <!-- these are the normal values for the roff control characters --> | ||
10 | <xsl:param name="git.docbook.backslash">\</xsl:param> | ||
11 | <xsl:param name="git.docbook.dot" >.</xsl:param> | ||
12 | |||
13 | </xsl:stylesheet> | ||
diff --git a/tools/perf/Documentation/manpage-suppress-sp.xsl b/tools/perf/Documentation/manpage-suppress-sp.xsl new file mode 100644 index 000000000000..a63c7632a87d --- /dev/null +++ b/tools/perf/Documentation/manpage-suppress-sp.xsl | |||
@@ -0,0 +1,21 @@ | |||
1 | <!-- manpage-suppress-sp.xsl: | ||
2 | special settings for manpages rendered from asciidoc+docbook | ||
3 | handles erroneous, inline .sp in manpage output of some | ||
4 | versions of docbook-xsl --> | ||
5 | <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" | ||
6 | version="1.0"> | ||
7 | |||
8 | <!-- attempt to work around spurious .sp at the tail of the line | ||
9 | that some versions of docbook stylesheets seem to add --> | ||
10 | <xsl:template match="simpara"> | ||
11 | <xsl:variable name="content"> | ||
12 | <xsl:apply-templates/> | ||
13 | </xsl:variable> | ||
14 | <xsl:value-of select="normalize-space($content)"/> | ||
15 | <xsl:if test="not(ancestor::authorblurb) and | ||
16 | not(ancestor::personblurb)"> | ||
17 | <xsl:text> </xsl:text> | ||
18 | </xsl:if> | ||
19 | </xsl:template> | ||
20 | |||
21 | </xsl:stylesheet> | ||
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt new file mode 100644 index 000000000000..c9dcade06831 --- /dev/null +++ b/tools/perf/Documentation/perf-annotate.txt | |||
@@ -0,0 +1,29 @@ | |||
1 | perf-annotate(1) | ||
2 | ============== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-annotate - Read perf.data (created by perf record) and display annotated code | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf annotate' [-i <file> | --input=file] symbol_name | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | This command reads the input file and displays an annotated version of the | ||
16 | code. If the object file has debug symbols then the source code will be | ||
17 | displayed alongside assembly code. | ||
18 | |||
19 | If there is no debug info in the object, then annotated assembly is displayed. | ||
20 | |||
21 | OPTIONS | ||
22 | ------- | ||
23 | -i:: | ||
24 | --input=:: | ||
25 | Input file name. (default: perf.data) | ||
26 | |||
27 | SEE ALSO | ||
28 | -------- | ||
29 | linkperf:perf-record[1] | ||
diff --git a/tools/perf/Documentation/perf-help.txt b/tools/perf/Documentation/perf-help.txt new file mode 100644 index 000000000000..514391818d1f --- /dev/null +++ b/tools/perf/Documentation/perf-help.txt | |||
@@ -0,0 +1,38 @@ | |||
1 | perf-help(1) | ||
2 | ============ | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-help - display help information about perf | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | 'perf help' [-a|--all] [COMMAND] | ||
11 | |||
12 | DESCRIPTION | ||
13 | ----------- | ||
14 | |||
15 | With no options and no COMMAND given, the synopsis of the 'perf' | ||
16 | command and a list of the most commonly used perf commands are printed | ||
17 | on the standard output. | ||
18 | |||
19 | If the option '--all' or '-a' is given, then all available commands are | ||
20 | printed on the standard output. | ||
21 | |||
22 | If a perf command is named, a manual page for that command is brought | ||
23 | up. The 'man' program is used by default for this purpose, but this | ||
24 | can be overridden by other options or configuration variables. | ||
25 | |||
26 | Note that `perf --help ...` is identical to `perf help ...` because the | ||
27 | former is internally converted into the latter. | ||
28 | |||
29 | OPTIONS | ||
30 | ------- | ||
31 | -a:: | ||
32 | --all:: | ||
33 | Prints all the available commands on the standard output. This | ||
34 | option supersedes any other option. | ||
35 | |||
36 | PERF | ||
37 | ---- | ||
38 | Part of the linkperf:perf[1] suite | ||
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt new file mode 100644 index 000000000000..8290b9422668 --- /dev/null +++ b/tools/perf/Documentation/perf-list.txt | |||
@@ -0,0 +1,25 @@ | |||
1 | perf-list(1) | ||
2 | ============ | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-list - List all symbolic event types | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf list' | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | This command displays the symbolic event types which can be selected in the | ||
16 | various perf commands with the -e option. | ||
17 | |||
18 | OPTIONS | ||
19 | ------- | ||
20 | None | ||
21 | |||
22 | SEE ALSO | ||
23 | -------- | ||
24 | linkperf:perf-stat[1], linkperf:perf-top[1], | ||
25 | linkperf:perf-record[1] | ||
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt new file mode 100644 index 000000000000..1dbc1eeb4c01 --- /dev/null +++ b/tools/perf/Documentation/perf-record.txt | |||
@@ -0,0 +1,42 @@ | |||
1 | perf-record(1) | ||
2 | ============== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-record - Run a command and record its profile into perf.data | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> | ||
12 | 'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>] | ||
13 | |||
14 | DESCRIPTION | ||
15 | ----------- | ||
16 | This command runs a command and gathers a performance counter profile | ||
17 | from it, into perf.data - without displaying anything. | ||
18 | |||
19 | This file can then be inspected later on, using 'perf report'. | ||
20 | |||
21 | |||
22 | OPTIONS | ||
23 | ------- | ||
24 | <command>...:: | ||
25 | Any command you can specify in a shell. | ||
26 | |||
27 | -e:: | ||
28 | --event=:: | ||
29 | Select the PMU event. Selection can be a symbolic event name | ||
30 | (use 'perf list' to list all events) or a raw PMU | ||
31 | event (eventsel+umask) in the form of rNNN where NNN is a | ||
32 | hexadecimal event descriptor. | ||
33 | |||
34 | -a:: | ||
35 | system-wide collection | ||
36 | |||
37 | -l:: | ||
38 | scale counter values | ||
39 | |||
40 | SEE ALSO | ||
41 | -------- | ||
42 | linkperf:perf-stat[1], linkperf:perf-list[1] | ||
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt new file mode 100644 index 000000000000..52d3fc6846a9 --- /dev/null +++ b/tools/perf/Documentation/perf-report.txt | |||
@@ -0,0 +1,26 @@ | |||
1 | perf-report(1) | ||
2 | ============== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-report - Read perf.data (created by perf record) and display the profile | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf report' [-i <file> | --input=file] | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | This command displays the performance counter profile information recorded | ||
16 | via perf report. | ||
17 | |||
18 | OPTIONS | ||
19 | ------- | ||
20 | -i:: | ||
21 | --input=:: | ||
22 | Input file name. (default: perf.data) | ||
23 | |||
24 | SEE ALSO | ||
25 | -------- | ||
26 | linkperf:perf-stat[1] | ||
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt new file mode 100644 index 000000000000..c368a72721d7 --- /dev/null +++ b/tools/perf/Documentation/perf-stat.txt | |||
@@ -0,0 +1,66 @@ | |||
1 | perf-stat(1) | ||
2 | ============ | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-stat - Run a command and gather performance counter statistics | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> | ||
12 | 'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>] | ||
13 | |||
14 | DESCRIPTION | ||
15 | ----------- | ||
16 | This command runs a command and gathers performance counter statistics | ||
17 | from it. | ||
18 | |||
19 | |||
20 | OPTIONS | ||
21 | ------- | ||
22 | <command>...:: | ||
23 | Any command you can specify in a shell. | ||
24 | |||
25 | |||
26 | -e:: | ||
27 | --event=:: | ||
28 | Select the PMU event. Selection can be a symbolic event name | ||
29 | (use 'perf list' to list all events) or a raw PMU | ||
30 | event (eventsel+umask) in the form of rNNN where NNN is a | ||
31 | hexadecimal event descriptor. | ||
32 | |||
33 | -i:: | ||
34 | --inherit:: | ||
35 | child tasks inherit counters | ||
36 | -p:: | ||
37 | --pid=<pid>:: | ||
38 | stat events on existing pid | ||
39 | |||
40 | -a:: | ||
41 | system-wide collection | ||
42 | |||
43 | -l:: | ||
44 | scale counter values | ||
45 | |||
46 | EXAMPLES | ||
47 | -------- | ||
48 | |||
49 | $ perf stat -- make -j | ||
50 | |||
51 | Performance counter stats for 'make -j': | ||
52 | |||
53 | 8117.370256 task clock ticks # 11.281 CPU utilization factor | ||
54 | 678 context switches # 0.000 M/sec | ||
55 | 133 CPU migrations # 0.000 M/sec | ||
56 | 235724 pagefaults # 0.029 M/sec | ||
57 | 24821162526 CPU cycles # 3057.784 M/sec | ||
58 | 18687303457 instructions # 2302.138 M/sec | ||
59 | 172158895 cache references # 21.209 M/sec | ||
60 | 27075259 cache misses # 3.335 M/sec | ||
61 | |||
62 | Wall-clock time elapsed: 719.554352 msecs | ||
63 | |||
64 | SEE ALSO | ||
65 | -------- | ||
66 | linkperf:perf-top[1], linkperf:perf-list[1] | ||
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt new file mode 100644 index 000000000000..539d01289725 --- /dev/null +++ b/tools/perf/Documentation/perf-top.txt | |||
@@ -0,0 +1,39 @@ | |||
1 | perf-top(1) | ||
2 | =========== | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf-top - Run a command and profile it | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | This command runs a command and gathers a performance counter profile | ||
16 | from it. | ||
17 | |||
18 | |||
19 | OPTIONS | ||
20 | ------- | ||
21 | <command>...:: | ||
22 | Any command you can specify in a shell. | ||
23 | |||
24 | -e:: | ||
25 | --event=:: | ||
26 | Select the PMU event. Selection can be a symbolic event name | ||
27 | (use 'perf list' to list all events) or a raw PMU | ||
28 | event (eventsel+umask) in the form of rNNN where NNN is a | ||
29 | hexadecimal event descriptor. | ||
30 | |||
31 | -a:: | ||
32 | system-wide collection | ||
33 | |||
34 | -l:: | ||
35 | scale counter values | ||
36 | |||
37 | SEE ALSO | ||
38 | -------- | ||
39 | linkperf:perf-stat[1], linkperf:perf-list[1] | ||
diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt new file mode 100644 index 000000000000..69c832557199 --- /dev/null +++ b/tools/perf/Documentation/perf.txt | |||
@@ -0,0 +1,24 @@ | |||
1 | perf(1) | ||
2 | ======= | ||
3 | |||
4 | NAME | ||
5 | ---- | ||
6 | perf - Performance analysis tools for Linux | ||
7 | |||
8 | SYNOPSIS | ||
9 | -------- | ||
10 | [verse] | ||
11 | 'perf' [--version] [--help] COMMAND [ARGS] | ||
12 | |||
13 | DESCRIPTION | ||
14 | ----------- | ||
15 | Performance counters for Linux are are a new kernel-based subsystem | ||
16 | that provide a framework for all things performance analysis. It | ||
17 | covers hardware level (CPU/PMU, Performance Monitoring Unit) features | ||
18 | and software features (software counters, tracepoints) as well. | ||
19 | |||
20 | SEE ALSO | ||
21 | -------- | ||
22 | linkperf:perf-stat[1], linkperf:perf-top[1], | ||
23 | linkperf:perf-record[1], linkperf:perf-report[1], | ||
24 | linkperf:perf-list[1] | ||
diff --git a/tools/perf/Makefile b/tools/perf/Makefile new file mode 100644 index 000000000000..0cbd5d6874ec --- /dev/null +++ b/tools/perf/Makefile | |||
@@ -0,0 +1,929 @@ | |||
1 | # The default target of this Makefile is... | ||
2 | all:: | ||
3 | |||
4 | # Define V=1 to have a more verbose compile. | ||
5 | # | ||
6 | # Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf() | ||
7 | # or vsnprintf() return -1 instead of number of characters which would | ||
8 | # have been written to the final string if enough space had been available. | ||
9 | # | ||
10 | # Define FREAD_READS_DIRECTORIES if your are on a system which succeeds | ||
11 | # when attempting to read from an fopen'ed directory. | ||
12 | # | ||
13 | # Define NO_OPENSSL environment variable if you do not have OpenSSL. | ||
14 | # This also implies MOZILLA_SHA1. | ||
15 | # | ||
16 | # Define CURLDIR=/foo/bar if your curl header and library files are in | ||
17 | # /foo/bar/include and /foo/bar/lib directories. | ||
18 | # | ||
19 | # Define EXPATDIR=/foo/bar if your expat header and library files are in | ||
20 | # /foo/bar/include and /foo/bar/lib directories. | ||
21 | # | ||
22 | # Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent. | ||
23 | # | ||
24 | # Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks | ||
25 | # d_type in struct dirent (latest Cygwin -- will be fixed soonish). | ||
26 | # | ||
27 | # Define NO_C99_FORMAT if your formatted IO functions (printf/scanf et.al.) | ||
28 | # do not support the 'size specifiers' introduced by C99, namely ll, hh, | ||
29 | # j, z, t. (representing long long int, char, intmax_t, size_t, ptrdiff_t). | ||
30 | # some C compilers supported these specifiers prior to C99 as an extension. | ||
31 | # | ||
32 | # Define NO_STRCASESTR if you don't have strcasestr. | ||
33 | # | ||
34 | # Define NO_MEMMEM if you don't have memmem. | ||
35 | # | ||
36 | # Define NO_STRTOUMAX if you don't have strtoumax in the C library. | ||
37 | # If your compiler also does not support long long or does not have | ||
38 | # strtoull, define NO_STRTOULL. | ||
39 | # | ||
40 | # Define NO_SETENV if you don't have setenv in the C library. | ||
41 | # | ||
42 | # Define NO_UNSETENV if you don't have unsetenv in the C library. | ||
43 | # | ||
44 | # Define NO_MKDTEMP if you don't have mkdtemp in the C library. | ||
45 | # | ||
46 | # Define NO_SYS_SELECT_H if you don't have sys/select.h. | ||
47 | # | ||
48 | # Define NO_SYMLINK_HEAD if you never want .perf/HEAD to be a symbolic link. | ||
49 | # Enable it on Windows. By default, symrefs are still used. | ||
50 | # | ||
51 | # Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability | ||
52 | # tests. These tests take up a significant amount of the total test time | ||
53 | # but are not needed unless you plan to talk to SVN repos. | ||
54 | # | ||
55 | # Define NO_FINK if you are building on Darwin/Mac OS X, have Fink | ||
56 | # installed in /sw, but don't want PERF to link against any libraries | ||
57 | # installed there. If defined you may specify your own (or Fink's) | ||
58 | # include directories and library directories by defining CFLAGS | ||
59 | # and LDFLAGS appropriately. | ||
60 | # | ||
61 | # Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X, | ||
62 | # have DarwinPorts installed in /opt/local, but don't want PERF to | ||
63 | # link against any libraries installed there. If defined you may | ||
64 | # specify your own (or DarwinPort's) include directories and | ||
65 | # library directories by defining CFLAGS and LDFLAGS appropriately. | ||
66 | # | ||
67 | # Define PPC_SHA1 environment variable when running make to make use of | ||
68 | # a bundled SHA1 routine optimized for PowerPC. | ||
69 | # | ||
70 | # Define ARM_SHA1 environment variable when running make to make use of | ||
71 | # a bundled SHA1 routine optimized for ARM. | ||
72 | # | ||
73 | # Define MOZILLA_SHA1 environment variable when running make to make use of | ||
74 | # a bundled SHA1 routine coming from Mozilla. It is GPL'd and should be fast | ||
75 | # on non-x86 architectures (e.g. PowerPC), while the OpenSSL version (default | ||
76 | # choice) has very fast version optimized for i586. | ||
77 | # | ||
78 | # Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin). | ||
79 | # | ||
80 | # Define NEEDS_LIBICONV if linking with libc is not enough (Darwin). | ||
81 | # | ||
82 | # Define NEEDS_SOCKET if linking with libc is not enough (SunOS, | ||
83 | # Patrick Mauritz). | ||
84 | # | ||
85 | # Define NO_MMAP if you want to avoid mmap. | ||
86 | # | ||
87 | # Define NO_PTHREADS if you do not have or do not want to use Pthreads. | ||
88 | # | ||
89 | # Define NO_PREAD if you have a problem with pread() system call (e.g. | ||
90 | # cygwin.dll before v1.5.22). | ||
91 | # | ||
92 | # Define NO_FAST_WORKING_DIRECTORY if accessing objects in pack files is | ||
93 | # generally faster on your platform than accessing the working directory. | ||
94 | # | ||
95 | # Define NO_TRUSTABLE_FILEMODE if your filesystem may claim to support | ||
96 | # the executable mode bit, but doesn't really do so. | ||
97 | # | ||
98 | # Define NO_IPV6 if you lack IPv6 support and getaddrinfo(). | ||
99 | # | ||
100 | # Define NO_SOCKADDR_STORAGE if your platform does not have struct | ||
101 | # sockaddr_storage. | ||
102 | # | ||
103 | # Define NO_ICONV if your libc does not properly support iconv. | ||
104 | # | ||
105 | # Define OLD_ICONV if your library has an old iconv(), where the second | ||
106 | # (input buffer pointer) parameter is declared with type (const char **). | ||
107 | # | ||
108 | # Define NO_DEFLATE_BOUND if your zlib does not have deflateBound. | ||
109 | # | ||
110 | # Define NO_R_TO_GCC_LINKER if your gcc does not like "-R/path/lib" | ||
111 | # that tells runtime paths to dynamic libraries; | ||
112 | # "-Wl,-rpath=/path/lib" is used instead. | ||
113 | # | ||
114 | # Define USE_NSEC below if you want perf to care about sub-second file mtimes | ||
115 | # and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and | ||
116 | # it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely | ||
117 | # randomly break unless your underlying filesystem supports those sub-second | ||
118 | # times (my ext3 doesn't). | ||
119 | # | ||
120 | # Define USE_ST_TIMESPEC if your "struct stat" uses "st_ctimespec" instead of | ||
121 | # "st_ctim" | ||
122 | # | ||
123 | # Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec" | ||
124 | # available. This automatically turns USE_NSEC off. | ||
125 | # | ||
126 | # Define USE_STDEV below if you want perf to care about the underlying device | ||
127 | # change being considered an inode change from the update-index perspective. | ||
128 | # | ||
129 | # Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks | ||
130 | # field that counts the on-disk footprint in 512-byte blocks. | ||
131 | # | ||
132 | # Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8 | ||
133 | # | ||
134 | # Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72. | ||
135 | # | ||
136 | # Define NO_PERL_MAKEMAKER if you cannot use Makefiles generated by perl's | ||
137 | # MakeMaker (e.g. using ActiveState under Cygwin). | ||
138 | # | ||
139 | # Define NO_PERL if you do not want Perl scripts or libraries at all. | ||
140 | # | ||
141 | # Define INTERNAL_QSORT to use Git's implementation of qsort(), which | ||
142 | # is a simplified version of the merge sort used in glibc. This is | ||
143 | # recommended if Git triggers O(n^2) behavior in your platform's qsort(). | ||
144 | # | ||
145 | # Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call | ||
146 | # your external grep (e.g., if your system lacks grep, if its grep is | ||
147 | # broken, or spawning external process is slower than built-in grep perf has). | ||
148 | |||
149 | PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE | ||
150 | @$(SHELL_PATH) util/PERF-VERSION-GEN | ||
151 | -include PERF-VERSION-FILE | ||
152 | |||
153 | uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') | ||
154 | uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not') | ||
155 | uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not') | ||
156 | uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not') | ||
157 | uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') | ||
158 | uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') | ||
159 | |||
160 | # CFLAGS and LDFLAGS are for the users to override from the command line. | ||
161 | |||
162 | CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6 | ||
163 | LDFLAGS = -lpthread -lrt -lelf | ||
164 | ALL_CFLAGS = $(CFLAGS) | ||
165 | ALL_LDFLAGS = $(LDFLAGS) | ||
166 | STRIP ?= strip | ||
167 | |||
168 | # Among the variables below, these: | ||
169 | # perfexecdir | ||
170 | # template_dir | ||
171 | # mandir | ||
172 | # infodir | ||
173 | # htmldir | ||
174 | # ETC_PERFCONFIG (but not sysconfdir) | ||
175 | # can be specified as a relative path some/where/else; | ||
176 | # this is interpreted as relative to $(prefix) and "perf" at | ||
177 | # runtime figures out where they are based on the path to the executable. | ||
178 | # This can help installing the suite in a relocatable way. | ||
179 | |||
180 | prefix = $(HOME) | ||
181 | bindir_relative = bin | ||
182 | bindir = $(prefix)/$(bindir_relative) | ||
183 | mandir = share/man | ||
184 | infodir = share/info | ||
185 | perfexecdir = libexec/perf-core | ||
186 | sharedir = $(prefix)/share | ||
187 | template_dir = share/perf-core/templates | ||
188 | htmldir = share/doc/perf-doc | ||
189 | ifeq ($(prefix),/usr) | ||
190 | sysconfdir = /etc | ||
191 | ETC_PERFCONFIG = $(sysconfdir)/perfconfig | ||
192 | else | ||
193 | sysconfdir = $(prefix)/etc | ||
194 | ETC_PERFCONFIG = etc/perfconfig | ||
195 | endif | ||
196 | lib = lib | ||
197 | # DESTDIR= | ||
198 | |||
199 | export prefix bindir sharedir sysconfdir | ||
200 | |||
201 | CC = gcc | ||
202 | AR = ar | ||
203 | RM = rm -f | ||
204 | TAR = tar | ||
205 | FIND = find | ||
206 | INSTALL = install | ||
207 | RPMBUILD = rpmbuild | ||
208 | PTHREAD_LIBS = -lpthread | ||
209 | |||
210 | # sparse is architecture-neutral, which means that we need to tell it | ||
211 | # explicitly what architecture to check for. Fix this up for yours.. | ||
212 | SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ | ||
213 | |||
214 | |||
215 | |||
216 | ### --- END CONFIGURATION SECTION --- | ||
217 | |||
218 | # Those must not be GNU-specific; they are shared with perl/ which may | ||
219 | # be built by a different compiler. (Note that this is an artifact now | ||
220 | # but it still might be nice to keep that distinction.) | ||
221 | BASIC_CFLAGS = | ||
222 | BASIC_LDFLAGS = | ||
223 | |||
224 | # Guard against environment variables | ||
225 | BUILTIN_OBJS = | ||
226 | BUILT_INS = | ||
227 | COMPAT_CFLAGS = | ||
228 | COMPAT_OBJS = | ||
229 | LIB_H = | ||
230 | LIB_OBJS = | ||
231 | SCRIPT_PERL = | ||
232 | SCRIPT_SH = | ||
233 | TEST_PROGRAMS = | ||
234 | |||
235 | # | ||
236 | # No scripts right now: | ||
237 | # | ||
238 | |||
239 | # SCRIPT_SH += perf-am.sh | ||
240 | |||
241 | # | ||
242 | # No Perl scripts right now: | ||
243 | # | ||
244 | |||
245 | # SCRIPT_PERL += perf-add--interactive.perl | ||
246 | |||
247 | SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \ | ||
248 | $(patsubst %.perl,%,$(SCRIPT_PERL)) | ||
249 | |||
250 | # Empty... | ||
251 | EXTRA_PROGRAMS = | ||
252 | |||
253 | # ... and all the rest that could be moved out of bindir to perfexecdir | ||
254 | PROGRAMS += $(EXTRA_PROGRAMS) | ||
255 | |||
256 | # | ||
257 | # Single 'perf' binary right now: | ||
258 | # | ||
259 | PROGRAMS += perf | ||
260 | |||
261 | # List built-in command $C whose implementation cmd_$C() is not in | ||
262 | # builtin-$C.o but is linked in as part of some other command. | ||
263 | # | ||
264 | # None right now: | ||
265 | # | ||
266 | # BUILT_INS += perf-init $X | ||
267 | |||
268 | # what 'all' will build and 'install' will install, in perfexecdir | ||
269 | ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) | ||
270 | |||
271 | # what 'all' will build but not install in perfexecdir | ||
272 | OTHER_PROGRAMS = perf$X | ||
273 | |||
274 | # Set paths to tools early so that they can be used for version tests. | ||
275 | ifndef SHELL_PATH | ||
276 | SHELL_PATH = /bin/sh | ||
277 | endif | ||
278 | ifndef PERL_PATH | ||
279 | PERL_PATH = /usr/bin/perl | ||
280 | endif | ||
281 | |||
282 | export PERL_PATH | ||
283 | |||
284 | LIB_FILE=libperf.a | ||
285 | |||
286 | LIB_H += ../../include/linux/perf_counter.h | ||
287 | LIB_H += perf.h | ||
288 | LIB_H += util/list.h | ||
289 | LIB_H += util/rbtree.h | ||
290 | LIB_H += util/levenshtein.h | ||
291 | LIB_H += util/parse-options.h | ||
292 | LIB_H += util/parse-events.h | ||
293 | LIB_H += util/quote.h | ||
294 | LIB_H += util/util.h | ||
295 | LIB_H += util/help.h | ||
296 | LIB_H += util/strbuf.h | ||
297 | LIB_H += util/string.h | ||
298 | LIB_H += util/run-command.h | ||
299 | LIB_H += util/sigchain.h | ||
300 | LIB_H += util/symbol.h | ||
301 | LIB_H += util/color.h | ||
302 | |||
303 | LIB_OBJS += util/abspath.o | ||
304 | LIB_OBJS += util/alias.o | ||
305 | LIB_OBJS += util/config.o | ||
306 | LIB_OBJS += util/ctype.o | ||
307 | LIB_OBJS += util/environment.o | ||
308 | LIB_OBJS += util/exec_cmd.o | ||
309 | LIB_OBJS += util/help.o | ||
310 | LIB_OBJS += util/levenshtein.o | ||
311 | LIB_OBJS += util/parse-options.o | ||
312 | LIB_OBJS += util/parse-events.o | ||
313 | LIB_OBJS += util/path.o | ||
314 | LIB_OBJS += util/rbtree.o | ||
315 | LIB_OBJS += util/run-command.o | ||
316 | LIB_OBJS += util/quote.o | ||
317 | LIB_OBJS += util/strbuf.o | ||
318 | LIB_OBJS += util/string.o | ||
319 | LIB_OBJS += util/usage.o | ||
320 | LIB_OBJS += util/wrapper.o | ||
321 | LIB_OBJS += util/sigchain.o | ||
322 | LIB_OBJS += util/symbol.o | ||
323 | LIB_OBJS += util/color.o | ||
324 | LIB_OBJS += util/pager.o | ||
325 | |||
326 | BUILTIN_OBJS += builtin-annotate.o | ||
327 | BUILTIN_OBJS += builtin-help.o | ||
328 | BUILTIN_OBJS += builtin-list.o | ||
329 | BUILTIN_OBJS += builtin-record.o | ||
330 | BUILTIN_OBJS += builtin-report.o | ||
331 | BUILTIN_OBJS += builtin-stat.o | ||
332 | BUILTIN_OBJS += builtin-top.o | ||
333 | |||
334 | PERFLIBS = $(LIB_FILE) | ||
335 | EXTLIBS = | ||
336 | |||
337 | # | ||
338 | # Platform specific tweaks | ||
339 | # | ||
340 | |||
341 | # We choose to avoid "if .. else if .. else .. endif endif" | ||
342 | # because maintaining the nesting to match is a pain. If | ||
343 | # we had "elif" things would have been much nicer... | ||
344 | |||
345 | -include config.mak.autogen | ||
346 | -include config.mak | ||
347 | |||
348 | ifeq ($(uname_S),Darwin) | ||
349 | ifndef NO_FINK | ||
350 | ifeq ($(shell test -d /sw/lib && echo y),y) | ||
351 | BASIC_CFLAGS += -I/sw/include | ||
352 | BASIC_LDFLAGS += -L/sw/lib | ||
353 | endif | ||
354 | endif | ||
355 | ifndef NO_DARWIN_PORTS | ||
356 | ifeq ($(shell test -d /opt/local/lib && echo y),y) | ||
357 | BASIC_CFLAGS += -I/opt/local/include | ||
358 | BASIC_LDFLAGS += -L/opt/local/lib | ||
359 | endif | ||
360 | endif | ||
361 | PTHREAD_LIBS = | ||
362 | endif | ||
363 | |||
364 | ifndef CC_LD_DYNPATH | ||
365 | ifdef NO_R_TO_GCC_LINKER | ||
366 | # Some gcc does not accept and pass -R to the linker to specify | ||
367 | # the runtime dynamic library path. | ||
368 | CC_LD_DYNPATH = -Wl,-rpath, | ||
369 | else | ||
370 | CC_LD_DYNPATH = -R | ||
371 | endif | ||
372 | endif | ||
373 | |||
374 | ifdef ZLIB_PATH | ||
375 | BASIC_CFLAGS += -I$(ZLIB_PATH)/include | ||
376 | EXTLIBS += -L$(ZLIB_PATH)/$(lib) $(CC_LD_DYNPATH)$(ZLIB_PATH)/$(lib) | ||
377 | endif | ||
378 | EXTLIBS += -lz | ||
379 | |||
380 | ifdef NEEDS_SOCKET | ||
381 | EXTLIBS += -lsocket | ||
382 | endif | ||
383 | ifdef NEEDS_NSL | ||
384 | EXTLIBS += -lnsl | ||
385 | endif | ||
386 | ifdef NO_D_TYPE_IN_DIRENT | ||
387 | BASIC_CFLAGS += -DNO_D_TYPE_IN_DIRENT | ||
388 | endif | ||
389 | ifdef NO_D_INO_IN_DIRENT | ||
390 | BASIC_CFLAGS += -DNO_D_INO_IN_DIRENT | ||
391 | endif | ||
392 | ifdef NO_ST_BLOCKS_IN_STRUCT_STAT | ||
393 | BASIC_CFLAGS += -DNO_ST_BLOCKS_IN_STRUCT_STAT | ||
394 | endif | ||
395 | ifdef USE_NSEC | ||
396 | BASIC_CFLAGS += -DUSE_NSEC | ||
397 | endif | ||
398 | ifdef USE_ST_TIMESPEC | ||
399 | BASIC_CFLAGS += -DUSE_ST_TIMESPEC | ||
400 | endif | ||
401 | ifdef NO_NSEC | ||
402 | BASIC_CFLAGS += -DNO_NSEC | ||
403 | endif | ||
404 | ifdef NO_C99_FORMAT | ||
405 | BASIC_CFLAGS += -DNO_C99_FORMAT | ||
406 | endif | ||
407 | ifdef SNPRINTF_RETURNS_BOGUS | ||
408 | COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS | ||
409 | COMPAT_OBJS += compat/snprintf.o | ||
410 | endif | ||
411 | ifdef FREAD_READS_DIRECTORIES | ||
412 | COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES | ||
413 | COMPAT_OBJS += compat/fopen.o | ||
414 | endif | ||
415 | ifdef NO_SYMLINK_HEAD | ||
416 | BASIC_CFLAGS += -DNO_SYMLINK_HEAD | ||
417 | endif | ||
418 | ifdef NO_STRCASESTR | ||
419 | COMPAT_CFLAGS += -DNO_STRCASESTR | ||
420 | COMPAT_OBJS += compat/strcasestr.o | ||
421 | endif | ||
422 | ifdef NO_STRTOUMAX | ||
423 | COMPAT_CFLAGS += -DNO_STRTOUMAX | ||
424 | COMPAT_OBJS += compat/strtoumax.o | ||
425 | endif | ||
426 | ifdef NO_STRTOULL | ||
427 | COMPAT_CFLAGS += -DNO_STRTOULL | ||
428 | endif | ||
429 | ifdef NO_SETENV | ||
430 | COMPAT_CFLAGS += -DNO_SETENV | ||
431 | COMPAT_OBJS += compat/setenv.o | ||
432 | endif | ||
433 | ifdef NO_MKDTEMP | ||
434 | COMPAT_CFLAGS += -DNO_MKDTEMP | ||
435 | COMPAT_OBJS += compat/mkdtemp.o | ||
436 | endif | ||
437 | ifdef NO_UNSETENV | ||
438 | COMPAT_CFLAGS += -DNO_UNSETENV | ||
439 | COMPAT_OBJS += compat/unsetenv.o | ||
440 | endif | ||
441 | ifdef NO_SYS_SELECT_H | ||
442 | BASIC_CFLAGS += -DNO_SYS_SELECT_H | ||
443 | endif | ||
444 | ifdef NO_MMAP | ||
445 | COMPAT_CFLAGS += -DNO_MMAP | ||
446 | COMPAT_OBJS += compat/mmap.o | ||
447 | else | ||
448 | ifdef USE_WIN32_MMAP | ||
449 | COMPAT_CFLAGS += -DUSE_WIN32_MMAP | ||
450 | COMPAT_OBJS += compat/win32mmap.o | ||
451 | endif | ||
452 | endif | ||
453 | ifdef NO_PREAD | ||
454 | COMPAT_CFLAGS += -DNO_PREAD | ||
455 | COMPAT_OBJS += compat/pread.o | ||
456 | endif | ||
457 | ifdef NO_FAST_WORKING_DIRECTORY | ||
458 | BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY | ||
459 | endif | ||
460 | ifdef NO_TRUSTABLE_FILEMODE | ||
461 | BASIC_CFLAGS += -DNO_TRUSTABLE_FILEMODE | ||
462 | endif | ||
463 | ifdef NO_IPV6 | ||
464 | BASIC_CFLAGS += -DNO_IPV6 | ||
465 | endif | ||
466 | ifdef NO_UINTMAX_T | ||
467 | BASIC_CFLAGS += -Duintmax_t=uint32_t | ||
468 | endif | ||
469 | ifdef NO_SOCKADDR_STORAGE | ||
470 | ifdef NO_IPV6 | ||
471 | BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in | ||
472 | else | ||
473 | BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in6 | ||
474 | endif | ||
475 | endif | ||
476 | ifdef NO_INET_NTOP | ||
477 | LIB_OBJS += compat/inet_ntop.o | ||
478 | endif | ||
479 | ifdef NO_INET_PTON | ||
480 | LIB_OBJS += compat/inet_pton.o | ||
481 | endif | ||
482 | |||
483 | ifdef NO_ICONV | ||
484 | BASIC_CFLAGS += -DNO_ICONV | ||
485 | endif | ||
486 | |||
487 | ifdef OLD_ICONV | ||
488 | BASIC_CFLAGS += -DOLD_ICONV | ||
489 | endif | ||
490 | |||
491 | ifdef NO_DEFLATE_BOUND | ||
492 | BASIC_CFLAGS += -DNO_DEFLATE_BOUND | ||
493 | endif | ||
494 | |||
495 | ifdef PPC_SHA1 | ||
496 | SHA1_HEADER = "ppc/sha1.h" | ||
497 | LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o | ||
498 | else | ||
499 | ifdef ARM_SHA1 | ||
500 | SHA1_HEADER = "arm/sha1.h" | ||
501 | LIB_OBJS += arm/sha1.o arm/sha1_arm.o | ||
502 | else | ||
503 | ifdef MOZILLA_SHA1 | ||
504 | SHA1_HEADER = "mozilla-sha1/sha1.h" | ||
505 | LIB_OBJS += mozilla-sha1/sha1.o | ||
506 | else | ||
507 | SHA1_HEADER = <openssl/sha.h> | ||
508 | EXTLIBS += $(LIB_4_CRYPTO) | ||
509 | endif | ||
510 | endif | ||
511 | endif | ||
512 | ifdef NO_PERL_MAKEMAKER | ||
513 | export NO_PERL_MAKEMAKER | ||
514 | endif | ||
515 | ifdef NO_HSTRERROR | ||
516 | COMPAT_CFLAGS += -DNO_HSTRERROR | ||
517 | COMPAT_OBJS += compat/hstrerror.o | ||
518 | endif | ||
519 | ifdef NO_MEMMEM | ||
520 | COMPAT_CFLAGS += -DNO_MEMMEM | ||
521 | COMPAT_OBJS += compat/memmem.o | ||
522 | endif | ||
523 | ifdef INTERNAL_QSORT | ||
524 | COMPAT_CFLAGS += -DINTERNAL_QSORT | ||
525 | COMPAT_OBJS += compat/qsort.o | ||
526 | endif | ||
527 | ifdef RUNTIME_PREFIX | ||
528 | COMPAT_CFLAGS += -DRUNTIME_PREFIX | ||
529 | endif | ||
530 | |||
531 | ifdef DIR_HAS_BSD_GROUP_SEMANTICS | ||
532 | COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS | ||
533 | endif | ||
534 | ifdef NO_EXTERNAL_GREP | ||
535 | BASIC_CFLAGS += -DNO_EXTERNAL_GREP | ||
536 | endif | ||
537 | |||
538 | ifeq ($(PERL_PATH),) | ||
539 | NO_PERL=NoThanks | ||
540 | endif | ||
541 | |||
542 | QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir | ||
543 | QUIET_SUBDIR1 = | ||
544 | |||
545 | ifneq ($(findstring $(MAKEFLAGS),w),w) | ||
546 | PRINT_DIR = --no-print-directory | ||
547 | else # "make -w" | ||
548 | NO_SUBDIR = : | ||
549 | endif | ||
550 | |||
551 | ifneq ($(findstring $(MAKEFLAGS),s),s) | ||
552 | ifndef V | ||
553 | QUIET_CC = @echo ' ' CC $@; | ||
554 | QUIET_AR = @echo ' ' AR $@; | ||
555 | QUIET_LINK = @echo ' ' LINK $@; | ||
556 | QUIET_BUILT_IN = @echo ' ' BUILTIN $@; | ||
557 | QUIET_GEN = @echo ' ' GEN $@; | ||
558 | QUIET_SUBDIR0 = +@subdir= | ||
559 | QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ | ||
560 | $(MAKE) $(PRINT_DIR) -C $$subdir | ||
561 | export V | ||
562 | export QUIET_GEN | ||
563 | export QUIET_BUILT_IN | ||
564 | endif | ||
565 | endif | ||
566 | |||
567 | ifdef ASCIIDOC8 | ||
568 | export ASCIIDOC8 | ||
569 | endif | ||
570 | |||
571 | # Shell quote (do not use $(call) to accommodate ancient setups); | ||
572 | |||
573 | SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER)) | ||
574 | ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) | ||
575 | |||
576 | DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) | ||
577 | bindir_SQ = $(subst ','\'',$(bindir)) | ||
578 | bindir_relative_SQ = $(subst ','\'',$(bindir_relative)) | ||
579 | mandir_SQ = $(subst ','\'',$(mandir)) | ||
580 | infodir_SQ = $(subst ','\'',$(infodir)) | ||
581 | perfexecdir_SQ = $(subst ','\'',$(perfexecdir)) | ||
582 | template_dir_SQ = $(subst ','\'',$(template_dir)) | ||
583 | htmldir_SQ = $(subst ','\'',$(htmldir)) | ||
584 | prefix_SQ = $(subst ','\'',$(prefix)) | ||
585 | |||
586 | SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) | ||
587 | PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) | ||
588 | |||
589 | LIBS = $(PERFLIBS) $(EXTLIBS) | ||
590 | |||
591 | BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \ | ||
592 | $(COMPAT_CFLAGS) | ||
593 | LIB_OBJS += $(COMPAT_OBJS) | ||
594 | |||
595 | ALL_CFLAGS += $(BASIC_CFLAGS) | ||
596 | ALL_LDFLAGS += $(BASIC_LDFLAGS) | ||
597 | |||
598 | export TAR INSTALL DESTDIR SHELL_PATH | ||
599 | |||
600 | |||
601 | ### Build rules | ||
602 | |||
603 | SHELL = $(SHELL_PATH) | ||
604 | |||
605 | all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) PERF-BUILD-OPTIONS | ||
606 | ifneq (,$X) | ||
607 | $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';) | ||
608 | endif | ||
609 | |||
610 | all:: | ||
611 | |||
612 | please_set_SHELL_PATH_to_a_more_modern_shell: | ||
613 | @$$(:) | ||
614 | |||
615 | shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell | ||
616 | |||
617 | strip: $(PROGRAMS) perf$X | ||
618 | $(STRIP) $(STRIP_OPTS) $(PROGRAMS) perf$X | ||
619 | |||
620 | perf.o: perf.c common-cmds.h PERF-CFLAGS | ||
621 | $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \ | ||
622 | '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ | ||
623 | $(ALL_CFLAGS) -c $(filter %.c,$^) | ||
624 | |||
625 | perf$X: perf.o $(BUILTIN_OBJS) $(PERFLIBS) | ||
626 | $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ perf.o \ | ||
627 | $(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS) | ||
628 | |||
629 | builtin-help.o: builtin-help.c common-cmds.h PERF-CFLAGS | ||
630 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ | ||
631 | '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ | ||
632 | '-DPERF_MAN_PATH="$(mandir_SQ)"' \ | ||
633 | '-DPERF_INFO_PATH="$(infodir_SQ)"' $< | ||
634 | |||
635 | $(BUILT_INS): perf$X | ||
636 | $(QUIET_BUILT_IN)$(RM) $@ && \ | ||
637 | ln perf$X $@ 2>/dev/null || \ | ||
638 | ln -s perf$X $@ 2>/dev/null || \ | ||
639 | cp perf$X $@ | ||
640 | |||
641 | common-cmds.h: util/generate-cmdlist.sh command-list.txt | ||
642 | |||
643 | common-cmds.h: $(wildcard Documentation/perf-*.txt) | ||
644 | $(QUIET_GEN)util/generate-cmdlist.sh > $@+ && mv $@+ $@ | ||
645 | |||
646 | $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh | ||
647 | $(QUIET_GEN)$(RM) $@ $@+ && \ | ||
648 | sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ | ||
649 | -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \ | ||
650 | -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ | ||
651 | -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ | ||
652 | -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ | ||
653 | $@.sh >$@+ && \ | ||
654 | chmod +x $@+ && \ | ||
655 | mv $@+ $@ | ||
656 | |||
657 | configure: configure.ac | ||
658 | $(QUIET_GEN)$(RM) $@ $<+ && \ | ||
659 | sed -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ | ||
660 | $< > $<+ && \ | ||
661 | autoconf -o $@ $<+ && \ | ||
662 | $(RM) $<+ | ||
663 | |||
664 | # These can record PERF_VERSION | ||
665 | perf.o perf.spec \ | ||
666 | $(patsubst %.sh,%,$(SCRIPT_SH)) \ | ||
667 | $(patsubst %.perl,%,$(SCRIPT_PERL)) \ | ||
668 | : PERF-VERSION-FILE | ||
669 | |||
670 | %.o: %.c PERF-CFLAGS | ||
671 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< | ||
672 | %.s: %.c PERF-CFLAGS | ||
673 | $(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $< | ||
674 | %.o: %.S | ||
675 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< | ||
676 | |||
677 | util/exec_cmd.o: util/exec_cmd.c PERF-CFLAGS | ||
678 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ | ||
679 | '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \ | ||
680 | '-DBINDIR="$(bindir_relative_SQ)"' \ | ||
681 | '-DPREFIX="$(prefix_SQ)"' \ | ||
682 | $< | ||
683 | |||
684 | builtin-init-db.o: builtin-init-db.c PERF-CFLAGS | ||
685 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $< | ||
686 | |||
687 | util/config.o: util/config.c PERF-CFLAGS | ||
688 | $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< | ||
689 | |||
690 | perf-%$X: %.o $(PERFLIBS) | ||
691 | $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) | ||
692 | |||
693 | $(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) | ||
694 | $(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) | ||
695 | builtin-revert.o wt-status.o: wt-status.h | ||
696 | |||
697 | $(LIB_FILE): $(LIB_OBJS) | ||
698 | $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) | ||
699 | |||
700 | doc: | ||
701 | $(MAKE) -C Documentation all | ||
702 | |||
703 | man: | ||
704 | $(MAKE) -C Documentation man | ||
705 | |||
706 | html: | ||
707 | $(MAKE) -C Documentation html | ||
708 | |||
709 | info: | ||
710 | $(MAKE) -C Documentation info | ||
711 | |||
712 | pdf: | ||
713 | $(MAKE) -C Documentation pdf | ||
714 | |||
715 | TAGS: | ||
716 | $(RM) TAGS | ||
717 | $(FIND) . -name '*.[hcS]' -print | xargs etags -a | ||
718 | |||
719 | tags: | ||
720 | $(RM) tags | ||
721 | $(FIND) . -name '*.[hcS]' -print | xargs ctags -a | ||
722 | |||
723 | cscope: | ||
724 | $(RM) cscope* | ||
725 | $(FIND) . -name '*.[hcS]' -print | xargs cscope -b | ||
726 | |||
727 | ### Detect prefix changes | ||
728 | TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\ | ||
729 | $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ) | ||
730 | |||
731 | PERF-CFLAGS: .FORCE-PERF-CFLAGS | ||
732 | @FLAGS='$(TRACK_CFLAGS)'; \ | ||
733 | if test x"$$FLAGS" != x"`cat PERF-CFLAGS 2>/dev/null`" ; then \ | ||
734 | echo 1>&2 " * new build flags or prefix"; \ | ||
735 | echo "$$FLAGS" >PERF-CFLAGS; \ | ||
736 | fi | ||
737 | |||
738 | # We need to apply sq twice, once to protect from the shell | ||
739 | # that runs PERF-BUILD-OPTIONS, and then again to protect it | ||
740 | # and the first level quoting from the shell that runs "echo". | ||
741 | PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS | ||
742 | @echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@ | ||
743 | @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@ | ||
744 | @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@ | ||
745 | @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@ | ||
746 | |||
747 | ### Testing rules | ||
748 | |||
749 | # | ||
750 | # None right now: | ||
751 | # | ||
752 | # TEST_PROGRAMS += test-something$X | ||
753 | |||
754 | all:: $(TEST_PROGRAMS) | ||
755 | |||
756 | # GNU make supports exporting all variables by "export" without parameters. | ||
757 | # However, the environment gets quite big, and some programs have problems | ||
758 | # with that. | ||
759 | |||
760 | export NO_SVN_TESTS | ||
761 | |||
762 | check: common-cmds.h | ||
763 | if sparse; \ | ||
764 | then \ | ||
765 | for i in *.c */*.c; \ | ||
766 | do \ | ||
767 | sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \ | ||
768 | done; \ | ||
769 | else \ | ||
770 | echo 2>&1 "Did you mean 'make test'?"; \ | ||
771 | exit 1; \ | ||
772 | fi | ||
773 | |||
774 | remove-dashes: | ||
775 | ./fixup-builtins $(BUILT_INS) $(PROGRAMS) $(SCRIPTS) | ||
776 | |||
777 | ### Installation rules | ||
778 | |||
779 | ifneq ($(filter /%,$(firstword $(template_dir))),) | ||
780 | template_instdir = $(template_dir) | ||
781 | else | ||
782 | template_instdir = $(prefix)/$(template_dir) | ||
783 | endif | ||
784 | export template_instdir | ||
785 | |||
786 | ifneq ($(filter /%,$(firstword $(perfexecdir))),) | ||
787 | perfexec_instdir = $(perfexecdir) | ||
788 | else | ||
789 | perfexec_instdir = $(prefix)/$(perfexecdir) | ||
790 | endif | ||
791 | perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) | ||
792 | export perfexec_instdir | ||
793 | |||
794 | install: all | ||
795 | $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' | ||
796 | $(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)' | ||
797 | ifdef BUILT_INS | ||
798 | $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' | ||
799 | $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' | ||
800 | ifneq (,$X) | ||
801 | $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';) | ||
802 | endif | ||
803 | endif | ||
804 | |||
805 | install-doc: | ||
806 | $(MAKE) -C Documentation install | ||
807 | |||
808 | install-man: | ||
809 | $(MAKE) -C Documentation install-man | ||
810 | |||
811 | install-html: | ||
812 | $(MAKE) -C Documentation install-html | ||
813 | |||
814 | install-info: | ||
815 | $(MAKE) -C Documentation install-info | ||
816 | |||
817 | install-pdf: | ||
818 | $(MAKE) -C Documentation install-pdf | ||
819 | |||
820 | quick-install-doc: | ||
821 | $(MAKE) -C Documentation quick-install | ||
822 | |||
823 | quick-install-man: | ||
824 | $(MAKE) -C Documentation quick-install-man | ||
825 | |||
826 | quick-install-html: | ||
827 | $(MAKE) -C Documentation quick-install-html | ||
828 | |||
829 | |||
830 | ### Maintainer's dist rules | ||
831 | # | ||
832 | # None right now | ||
833 | # | ||
834 | # | ||
835 | # perf.spec: perf.spec.in | ||
836 | # sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+ | ||
837 | # mv $@+ $@ | ||
838 | # | ||
839 | # PERF_TARNAME=perf-$(PERF_VERSION) | ||
840 | # dist: perf.spec perf-archive$(X) configure | ||
841 | # ./perf-archive --format=tar \ | ||
842 | # --prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar | ||
843 | # @mkdir -p $(PERF_TARNAME) | ||
844 | # @cp perf.spec configure $(PERF_TARNAME) | ||
845 | # @echo $(PERF_VERSION) > $(PERF_TARNAME)/version | ||
846 | # $(TAR) rf $(PERF_TARNAME).tar \ | ||
847 | # $(PERF_TARNAME)/perf.spec \ | ||
848 | # $(PERF_TARNAME)/configure \ | ||
849 | # $(PERF_TARNAME)/version | ||
850 | # @$(RM) -r $(PERF_TARNAME) | ||
851 | # gzip -f -9 $(PERF_TARNAME).tar | ||
852 | # | ||
853 | # htmldocs = perf-htmldocs-$(PERF_VERSION) | ||
854 | # manpages = perf-manpages-$(PERF_VERSION) | ||
855 | # dist-doc: | ||
856 | # $(RM) -r .doc-tmp-dir | ||
857 | # mkdir .doc-tmp-dir | ||
858 | # $(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc | ||
859 | # cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar . | ||
860 | # gzip -n -9 -f $(htmldocs).tar | ||
861 | # : | ||
862 | # $(RM) -r .doc-tmp-dir | ||
863 | # mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7 | ||
864 | # $(MAKE) -C Documentation DESTDIR=./ \ | ||
865 | # man1dir=../.doc-tmp-dir/man1 \ | ||
866 | # man5dir=../.doc-tmp-dir/man5 \ | ||
867 | # man7dir=../.doc-tmp-dir/man7 \ | ||
868 | # install | ||
869 | # cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar . | ||
870 | # gzip -n -9 -f $(manpages).tar | ||
871 | # $(RM) -r .doc-tmp-dir | ||
872 | # | ||
873 | # rpm: dist | ||
874 | # $(RPMBUILD) -ta $(PERF_TARNAME).tar.gz | ||
875 | |||
876 | ### Cleaning rules | ||
877 | |||
878 | distclean: clean | ||
879 | # $(RM) configure | ||
880 | |||
881 | clean: | ||
882 | $(RM) *.o */*.o $(LIB_FILE) | ||
883 | $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X | ||
884 | $(RM) $(TEST_PROGRAMS) | ||
885 | $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope* | ||
886 | $(RM) -r autom4te.cache | ||
887 | $(RM) config.log config.mak.autogen config.mak.append config.status config.cache | ||
888 | $(RM) -r $(PERF_TARNAME) .doc-tmp-dir | ||
889 | $(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz | ||
890 | $(RM) $(htmldocs).tar.gz $(manpages).tar.gz | ||
891 | $(MAKE) -C Documentation/ clean | ||
892 | $(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-BUILD-OPTIONS | ||
893 | |||
894 | .PHONY: all install clean strip | ||
895 | .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell | ||
896 | .PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS | ||
897 | .PHONY: .FORCE-PERF-BUILD-OPTIONS | ||
898 | |||
899 | ### Make sure built-ins do not have dups and listed in perf.c | ||
900 | # | ||
901 | check-builtins:: | ||
902 | ./check-builtins.sh | ||
903 | |||
904 | ### Test suite coverage testing | ||
905 | # | ||
906 | # None right now | ||
907 | # | ||
908 | # .PHONY: coverage coverage-clean coverage-build coverage-report | ||
909 | # | ||
910 | # coverage: | ||
911 | # $(MAKE) coverage-build | ||
912 | # $(MAKE) coverage-report | ||
913 | # | ||
914 | # coverage-clean: | ||
915 | # rm -f *.gcda *.gcno | ||
916 | # | ||
917 | # COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs | ||
918 | # COVERAGE_LDFLAGS = $(CFLAGS) -O0 -lgcov | ||
919 | # | ||
920 | # coverage-build: coverage-clean | ||
921 | # $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all | ||
922 | # $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \ | ||
923 | # -j1 test | ||
924 | # | ||
925 | # coverage-report: | ||
926 | # gcov -b *.c */*.c | ||
927 | # grep '^function.*called 0 ' *.c.gcov */*.c.gcov \ | ||
928 | # | sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \ | ||
929 | # | tee coverage-untested-functions | ||
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c new file mode 100644 index 000000000000..b1ed5f766cb3 --- /dev/null +++ b/tools/perf/builtin-annotate.c | |||
@@ -0,0 +1,1356 @@ | |||
1 | /* | ||
2 | * builtin-annotate.c | ||
3 | * | ||
4 | * Builtin annotate command: Analyze the perf.data input file, | ||
5 | * look up and read DSOs and symbol information and display | ||
6 | * a histogram of results, along various sorting keys. | ||
7 | */ | ||
8 | #include "builtin.h" | ||
9 | |||
10 | #include "util/util.h" | ||
11 | |||
12 | #include "util/color.h" | ||
13 | #include "util/list.h" | ||
14 | #include "util/cache.h" | ||
15 | #include "util/rbtree.h" | ||
16 | #include "util/symbol.h" | ||
17 | #include "util/string.h" | ||
18 | |||
19 | #include "perf.h" | ||
20 | |||
21 | #include "util/parse-options.h" | ||
22 | #include "util/parse-events.h" | ||
23 | |||
24 | #define SHOW_KERNEL 1 | ||
25 | #define SHOW_USER 2 | ||
26 | #define SHOW_HV 4 | ||
27 | |||
28 | static char const *input_name = "perf.data"; | ||
29 | static char *vmlinux = "vmlinux"; | ||
30 | |||
31 | static char default_sort_order[] = "comm,symbol"; | ||
32 | static char *sort_order = default_sort_order; | ||
33 | |||
34 | static int input; | ||
35 | static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; | ||
36 | |||
37 | static int dump_trace = 0; | ||
38 | #define dprintf(x...) do { if (dump_trace) printf(x); } while (0) | ||
39 | |||
40 | static int verbose; | ||
41 | |||
42 | static unsigned long page_size; | ||
43 | static unsigned long mmap_window = 32; | ||
44 | |||
45 | struct ip_event { | ||
46 | struct perf_event_header header; | ||
47 | __u64 ip; | ||
48 | __u32 pid, tid; | ||
49 | }; | ||
50 | |||
51 | struct mmap_event { | ||
52 | struct perf_event_header header; | ||
53 | __u32 pid, tid; | ||
54 | __u64 start; | ||
55 | __u64 len; | ||
56 | __u64 pgoff; | ||
57 | char filename[PATH_MAX]; | ||
58 | }; | ||
59 | |||
60 | struct comm_event { | ||
61 | struct perf_event_header header; | ||
62 | __u32 pid, tid; | ||
63 | char comm[16]; | ||
64 | }; | ||
65 | |||
66 | struct fork_event { | ||
67 | struct perf_event_header header; | ||
68 | __u32 pid, ppid; | ||
69 | }; | ||
70 | |||
71 | struct period_event { | ||
72 | struct perf_event_header header; | ||
73 | __u64 time; | ||
74 | __u64 id; | ||
75 | __u64 sample_period; | ||
76 | }; | ||
77 | |||
78 | typedef union event_union { | ||
79 | struct perf_event_header header; | ||
80 | struct ip_event ip; | ||
81 | struct mmap_event mmap; | ||
82 | struct comm_event comm; | ||
83 | struct fork_event fork; | ||
84 | struct period_event period; | ||
85 | } event_t; | ||
86 | |||
87 | static LIST_HEAD(dsos); | ||
88 | static struct dso *kernel_dso; | ||
89 | static struct dso *vdso; | ||
90 | |||
91 | |||
92 | static void dsos__add(struct dso *dso) | ||
93 | { | ||
94 | list_add_tail(&dso->node, &dsos); | ||
95 | } | ||
96 | |||
97 | static struct dso *dsos__find(const char *name) | ||
98 | { | ||
99 | struct dso *pos; | ||
100 | |||
101 | list_for_each_entry(pos, &dsos, node) | ||
102 | if (strcmp(pos->name, name) == 0) | ||
103 | return pos; | ||
104 | return NULL; | ||
105 | } | ||
106 | |||
107 | static struct dso *dsos__findnew(const char *name) | ||
108 | { | ||
109 | struct dso *dso = dsos__find(name); | ||
110 | int nr; | ||
111 | |||
112 | if (dso) | ||
113 | return dso; | ||
114 | |||
115 | dso = dso__new(name, 0); | ||
116 | if (!dso) | ||
117 | goto out_delete_dso; | ||
118 | |||
119 | nr = dso__load(dso, NULL, verbose); | ||
120 | if (nr < 0) { | ||
121 | if (verbose) | ||
122 | fprintf(stderr, "Failed to open: %s\n", name); | ||
123 | goto out_delete_dso; | ||
124 | } | ||
125 | if (!nr && verbose) { | ||
126 | fprintf(stderr, | ||
127 | "No symbols found in: %s, maybe install a debug package?\n", | ||
128 | name); | ||
129 | } | ||
130 | |||
131 | dsos__add(dso); | ||
132 | |||
133 | return dso; | ||
134 | |||
135 | out_delete_dso: | ||
136 | dso__delete(dso); | ||
137 | return NULL; | ||
138 | } | ||
139 | |||
140 | static void dsos__fprintf(FILE *fp) | ||
141 | { | ||
142 | struct dso *pos; | ||
143 | |||
144 | list_for_each_entry(pos, &dsos, node) | ||
145 | dso__fprintf(pos, fp); | ||
146 | } | ||
147 | |||
148 | static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip) | ||
149 | { | ||
150 | return dso__find_symbol(kernel_dso, ip); | ||
151 | } | ||
152 | |||
153 | static int load_kernel(void) | ||
154 | { | ||
155 | int err; | ||
156 | |||
157 | kernel_dso = dso__new("[kernel]", 0); | ||
158 | if (!kernel_dso) | ||
159 | return -1; | ||
160 | |||
161 | err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose); | ||
162 | if (err) { | ||
163 | dso__delete(kernel_dso); | ||
164 | kernel_dso = NULL; | ||
165 | } else | ||
166 | dsos__add(kernel_dso); | ||
167 | |||
168 | vdso = dso__new("[vdso]", 0); | ||
169 | if (!vdso) | ||
170 | return -1; | ||
171 | |||
172 | vdso->find_symbol = vdso__find_symbol; | ||
173 | |||
174 | dsos__add(vdso); | ||
175 | |||
176 | return err; | ||
177 | } | ||
178 | |||
179 | struct map { | ||
180 | struct list_head node; | ||
181 | __u64 start; | ||
182 | __u64 end; | ||
183 | __u64 pgoff; | ||
184 | __u64 (*map_ip)(struct map *, __u64); | ||
185 | struct dso *dso; | ||
186 | }; | ||
187 | |||
188 | static __u64 map__map_ip(struct map *map, __u64 ip) | ||
189 | { | ||
190 | return ip - map->start + map->pgoff; | ||
191 | } | ||
192 | |||
193 | static __u64 vdso__map_ip(struct map *map, __u64 ip) | ||
194 | { | ||
195 | return ip; | ||
196 | } | ||
197 | |||
198 | static struct map *map__new(struct mmap_event *event) | ||
199 | { | ||
200 | struct map *self = malloc(sizeof(*self)); | ||
201 | |||
202 | if (self != NULL) { | ||
203 | const char *filename = event->filename; | ||
204 | |||
205 | self->start = event->start; | ||
206 | self->end = event->start + event->len; | ||
207 | self->pgoff = event->pgoff; | ||
208 | |||
209 | self->dso = dsos__findnew(filename); | ||
210 | if (self->dso == NULL) | ||
211 | goto out_delete; | ||
212 | |||
213 | if (self->dso == vdso) | ||
214 | self->map_ip = vdso__map_ip; | ||
215 | else | ||
216 | self->map_ip = map__map_ip; | ||
217 | } | ||
218 | return self; | ||
219 | out_delete: | ||
220 | free(self); | ||
221 | return NULL; | ||
222 | } | ||
223 | |||
224 | static struct map *map__clone(struct map *self) | ||
225 | { | ||
226 | struct map *map = malloc(sizeof(*self)); | ||
227 | |||
228 | if (!map) | ||
229 | return NULL; | ||
230 | |||
231 | memcpy(map, self, sizeof(*self)); | ||
232 | |||
233 | return map; | ||
234 | } | ||
235 | |||
236 | static int map__overlap(struct map *l, struct map *r) | ||
237 | { | ||
238 | if (l->start > r->start) { | ||
239 | struct map *t = l; | ||
240 | l = r; | ||
241 | r = t; | ||
242 | } | ||
243 | |||
244 | if (l->end > r->start) | ||
245 | return 1; | ||
246 | |||
247 | return 0; | ||
248 | } | ||
249 | |||
250 | static size_t map__fprintf(struct map *self, FILE *fp) | ||
251 | { | ||
252 | return fprintf(fp, " %Lx-%Lx %Lx %s\n", | ||
253 | self->start, self->end, self->pgoff, self->dso->name); | ||
254 | } | ||
255 | |||
256 | |||
257 | struct thread { | ||
258 | struct rb_node rb_node; | ||
259 | struct list_head maps; | ||
260 | pid_t pid; | ||
261 | char *comm; | ||
262 | }; | ||
263 | |||
264 | static struct thread *thread__new(pid_t pid) | ||
265 | { | ||
266 | struct thread *self = malloc(sizeof(*self)); | ||
267 | |||
268 | if (self != NULL) { | ||
269 | self->pid = pid; | ||
270 | self->comm = malloc(32); | ||
271 | if (self->comm) | ||
272 | snprintf(self->comm, 32, ":%d", self->pid); | ||
273 | INIT_LIST_HEAD(&self->maps); | ||
274 | } | ||
275 | |||
276 | return self; | ||
277 | } | ||
278 | |||
279 | static int thread__set_comm(struct thread *self, const char *comm) | ||
280 | { | ||
281 | if (self->comm) | ||
282 | free(self->comm); | ||
283 | self->comm = strdup(comm); | ||
284 | return self->comm ? 0 : -ENOMEM; | ||
285 | } | ||
286 | |||
287 | static size_t thread__fprintf(struct thread *self, FILE *fp) | ||
288 | { | ||
289 | struct map *pos; | ||
290 | size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); | ||
291 | |||
292 | list_for_each_entry(pos, &self->maps, node) | ||
293 | ret += map__fprintf(pos, fp); | ||
294 | |||
295 | return ret; | ||
296 | } | ||
297 | |||
298 | |||
299 | static struct rb_root threads; | ||
300 | static struct thread *last_match; | ||
301 | |||
302 | static struct thread *threads__findnew(pid_t pid) | ||
303 | { | ||
304 | struct rb_node **p = &threads.rb_node; | ||
305 | struct rb_node *parent = NULL; | ||
306 | struct thread *th; | ||
307 | |||
308 | /* | ||
309 | * Font-end cache - PID lookups come in blocks, | ||
310 | * so most of the time we dont have to look up | ||
311 | * the full rbtree: | ||
312 | */ | ||
313 | if (last_match && last_match->pid == pid) | ||
314 | return last_match; | ||
315 | |||
316 | while (*p != NULL) { | ||
317 | parent = *p; | ||
318 | th = rb_entry(parent, struct thread, rb_node); | ||
319 | |||
320 | if (th->pid == pid) { | ||
321 | last_match = th; | ||
322 | return th; | ||
323 | } | ||
324 | |||
325 | if (pid < th->pid) | ||
326 | p = &(*p)->rb_left; | ||
327 | else | ||
328 | p = &(*p)->rb_right; | ||
329 | } | ||
330 | |||
331 | th = thread__new(pid); | ||
332 | if (th != NULL) { | ||
333 | rb_link_node(&th->rb_node, parent, p); | ||
334 | rb_insert_color(&th->rb_node, &threads); | ||
335 | last_match = th; | ||
336 | } | ||
337 | |||
338 | return th; | ||
339 | } | ||
340 | |||
341 | static void thread__insert_map(struct thread *self, struct map *map) | ||
342 | { | ||
343 | struct map *pos, *tmp; | ||
344 | |||
345 | list_for_each_entry_safe(pos, tmp, &self->maps, node) { | ||
346 | if (map__overlap(pos, map)) { | ||
347 | list_del_init(&pos->node); | ||
348 | /* XXX leaks dsos */ | ||
349 | free(pos); | ||
350 | } | ||
351 | } | ||
352 | |||
353 | list_add_tail(&map->node, &self->maps); | ||
354 | } | ||
355 | |||
356 | static int thread__fork(struct thread *self, struct thread *parent) | ||
357 | { | ||
358 | struct map *map; | ||
359 | |||
360 | if (self->comm) | ||
361 | free(self->comm); | ||
362 | self->comm = strdup(parent->comm); | ||
363 | if (!self->comm) | ||
364 | return -ENOMEM; | ||
365 | |||
366 | list_for_each_entry(map, &parent->maps, node) { | ||
367 | struct map *new = map__clone(map); | ||
368 | if (!new) | ||
369 | return -ENOMEM; | ||
370 | thread__insert_map(self, new); | ||
371 | } | ||
372 | |||
373 | return 0; | ||
374 | } | ||
375 | |||
376 | static struct map *thread__find_map(struct thread *self, __u64 ip) | ||
377 | { | ||
378 | struct map *pos; | ||
379 | |||
380 | if (self == NULL) | ||
381 | return NULL; | ||
382 | |||
383 | list_for_each_entry(pos, &self->maps, node) | ||
384 | if (ip >= pos->start && ip <= pos->end) | ||
385 | return pos; | ||
386 | |||
387 | return NULL; | ||
388 | } | ||
389 | |||
390 | static size_t threads__fprintf(FILE *fp) | ||
391 | { | ||
392 | size_t ret = 0; | ||
393 | struct rb_node *nd; | ||
394 | |||
395 | for (nd = rb_first(&threads); nd; nd = rb_next(nd)) { | ||
396 | struct thread *pos = rb_entry(nd, struct thread, rb_node); | ||
397 | |||
398 | ret += thread__fprintf(pos, fp); | ||
399 | } | ||
400 | |||
401 | return ret; | ||
402 | } | ||
403 | |||
404 | /* | ||
405 | * histogram, sorted on item, collects counts | ||
406 | */ | ||
407 | |||
408 | static struct rb_root hist; | ||
409 | |||
410 | struct hist_entry { | ||
411 | struct rb_node rb_node; | ||
412 | |||
413 | struct thread *thread; | ||
414 | struct map *map; | ||
415 | struct dso *dso; | ||
416 | struct symbol *sym; | ||
417 | __u64 ip; | ||
418 | char level; | ||
419 | |||
420 | uint32_t count; | ||
421 | }; | ||
422 | |||
423 | /* | ||
424 | * configurable sorting bits | ||
425 | */ | ||
426 | |||
427 | struct sort_entry { | ||
428 | struct list_head list; | ||
429 | |||
430 | char *header; | ||
431 | |||
432 | int64_t (*cmp)(struct hist_entry *, struct hist_entry *); | ||
433 | int64_t (*collapse)(struct hist_entry *, struct hist_entry *); | ||
434 | size_t (*print)(FILE *fp, struct hist_entry *); | ||
435 | }; | ||
436 | |||
437 | /* --sort pid */ | ||
438 | |||
439 | static int64_t | ||
440 | sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) | ||
441 | { | ||
442 | return right->thread->pid - left->thread->pid; | ||
443 | } | ||
444 | |||
445 | static size_t | ||
446 | sort__thread_print(FILE *fp, struct hist_entry *self) | ||
447 | { | ||
448 | return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid); | ||
449 | } | ||
450 | |||
451 | static struct sort_entry sort_thread = { | ||
452 | .header = " Command: Pid", | ||
453 | .cmp = sort__thread_cmp, | ||
454 | .print = sort__thread_print, | ||
455 | }; | ||
456 | |||
457 | /* --sort comm */ | ||
458 | |||
459 | static int64_t | ||
460 | sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) | ||
461 | { | ||
462 | return right->thread->pid - left->thread->pid; | ||
463 | } | ||
464 | |||
465 | static int64_t | ||
466 | sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) | ||
467 | { | ||
468 | char *comm_l = left->thread->comm; | ||
469 | char *comm_r = right->thread->comm; | ||
470 | |||
471 | if (!comm_l || !comm_r) { | ||
472 | if (!comm_l && !comm_r) | ||
473 | return 0; | ||
474 | else if (!comm_l) | ||
475 | return -1; | ||
476 | else | ||
477 | return 1; | ||
478 | } | ||
479 | |||
480 | return strcmp(comm_l, comm_r); | ||
481 | } | ||
482 | |||
483 | static size_t | ||
484 | sort__comm_print(FILE *fp, struct hist_entry *self) | ||
485 | { | ||
486 | return fprintf(fp, "%16s", self->thread->comm); | ||
487 | } | ||
488 | |||
489 | static struct sort_entry sort_comm = { | ||
490 | .header = " Command", | ||
491 | .cmp = sort__comm_cmp, | ||
492 | .collapse = sort__comm_collapse, | ||
493 | .print = sort__comm_print, | ||
494 | }; | ||
495 | |||
496 | /* --sort dso */ | ||
497 | |||
498 | static int64_t | ||
499 | sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) | ||
500 | { | ||
501 | struct dso *dso_l = left->dso; | ||
502 | struct dso *dso_r = right->dso; | ||
503 | |||
504 | if (!dso_l || !dso_r) { | ||
505 | if (!dso_l && !dso_r) | ||
506 | return 0; | ||
507 | else if (!dso_l) | ||
508 | return -1; | ||
509 | else | ||
510 | return 1; | ||
511 | } | ||
512 | |||
513 | return strcmp(dso_l->name, dso_r->name); | ||
514 | } | ||
515 | |||
516 | static size_t | ||
517 | sort__dso_print(FILE *fp, struct hist_entry *self) | ||
518 | { | ||
519 | if (self->dso) | ||
520 | return fprintf(fp, "%-25s", self->dso->name); | ||
521 | |||
522 | return fprintf(fp, "%016llx ", (__u64)self->ip); | ||
523 | } | ||
524 | |||
525 | static struct sort_entry sort_dso = { | ||
526 | .header = "Shared Object ", | ||
527 | .cmp = sort__dso_cmp, | ||
528 | .print = sort__dso_print, | ||
529 | }; | ||
530 | |||
531 | /* --sort symbol */ | ||
532 | |||
533 | static int64_t | ||
534 | sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) | ||
535 | { | ||
536 | __u64 ip_l, ip_r; | ||
537 | |||
538 | if (left->sym == right->sym) | ||
539 | return 0; | ||
540 | |||
541 | ip_l = left->sym ? left->sym->start : left->ip; | ||
542 | ip_r = right->sym ? right->sym->start : right->ip; | ||
543 | |||
544 | return (int64_t)(ip_r - ip_l); | ||
545 | } | ||
546 | |||
547 | static size_t | ||
548 | sort__sym_print(FILE *fp, struct hist_entry *self) | ||
549 | { | ||
550 | size_t ret = 0; | ||
551 | |||
552 | if (verbose) | ||
553 | ret += fprintf(fp, "%#018llx ", (__u64)self->ip); | ||
554 | |||
555 | if (self->sym) { | ||
556 | ret += fprintf(fp, "[%c] %s", | ||
557 | self->dso == kernel_dso ? 'k' : '.', self->sym->name); | ||
558 | } else { | ||
559 | ret += fprintf(fp, "%#016llx", (__u64)self->ip); | ||
560 | } | ||
561 | |||
562 | return ret; | ||
563 | } | ||
564 | |||
565 | static struct sort_entry sort_sym = { | ||
566 | .header = "Symbol", | ||
567 | .cmp = sort__sym_cmp, | ||
568 | .print = sort__sym_print, | ||
569 | }; | ||
570 | |||
571 | static int sort__need_collapse = 0; | ||
572 | |||
573 | struct sort_dimension { | ||
574 | char *name; | ||
575 | struct sort_entry *entry; | ||
576 | int taken; | ||
577 | }; | ||
578 | |||
579 | static struct sort_dimension sort_dimensions[] = { | ||
580 | { .name = "pid", .entry = &sort_thread, }, | ||
581 | { .name = "comm", .entry = &sort_comm, }, | ||
582 | { .name = "dso", .entry = &sort_dso, }, | ||
583 | { .name = "symbol", .entry = &sort_sym, }, | ||
584 | }; | ||
585 | |||
586 | static LIST_HEAD(hist_entry__sort_list); | ||
587 | |||
588 | static int sort_dimension__add(char *tok) | ||
589 | { | ||
590 | int i; | ||
591 | |||
592 | for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { | ||
593 | struct sort_dimension *sd = &sort_dimensions[i]; | ||
594 | |||
595 | if (sd->taken) | ||
596 | continue; | ||
597 | |||
598 | if (strncasecmp(tok, sd->name, strlen(tok))) | ||
599 | continue; | ||
600 | |||
601 | if (sd->entry->collapse) | ||
602 | sort__need_collapse = 1; | ||
603 | |||
604 | list_add_tail(&sd->entry->list, &hist_entry__sort_list); | ||
605 | sd->taken = 1; | ||
606 | |||
607 | return 0; | ||
608 | } | ||
609 | |||
610 | return -ESRCH; | ||
611 | } | ||
612 | |||
613 | static int64_t | ||
614 | hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) | ||
615 | { | ||
616 | struct sort_entry *se; | ||
617 | int64_t cmp = 0; | ||
618 | |||
619 | list_for_each_entry(se, &hist_entry__sort_list, list) { | ||
620 | cmp = se->cmp(left, right); | ||
621 | if (cmp) | ||
622 | break; | ||
623 | } | ||
624 | |||
625 | return cmp; | ||
626 | } | ||
627 | |||
628 | static int64_t | ||
629 | hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) | ||
630 | { | ||
631 | struct sort_entry *se; | ||
632 | int64_t cmp = 0; | ||
633 | |||
634 | list_for_each_entry(se, &hist_entry__sort_list, list) { | ||
635 | int64_t (*f)(struct hist_entry *, struct hist_entry *); | ||
636 | |||
637 | f = se->collapse ?: se->cmp; | ||
638 | |||
639 | cmp = f(left, right); | ||
640 | if (cmp) | ||
641 | break; | ||
642 | } | ||
643 | |||
644 | return cmp; | ||
645 | } | ||
646 | |||
647 | /* | ||
648 | * collect histogram counts | ||
649 | */ | ||
650 | static void hist_hit(struct hist_entry *he, __u64 ip) | ||
651 | { | ||
652 | unsigned int sym_size, offset; | ||
653 | struct symbol *sym = he->sym; | ||
654 | |||
655 | he->count++; | ||
656 | |||
657 | if (!sym || !sym->hist) | ||
658 | return; | ||
659 | |||
660 | sym_size = sym->end - sym->start; | ||
661 | offset = ip - sym->start; | ||
662 | |||
663 | if (offset >= sym_size) | ||
664 | return; | ||
665 | |||
666 | sym->hist_sum++; | ||
667 | sym->hist[offset]++; | ||
668 | |||
669 | if (verbose >= 3) | ||
670 | printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n", | ||
671 | (void *)(unsigned long)he->sym->start, | ||
672 | he->sym->name, | ||
673 | (void *)(unsigned long)ip, ip - he->sym->start, | ||
674 | sym->hist[offset]); | ||
675 | } | ||
676 | |||
677 | static int | ||
678 | hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, | ||
679 | struct symbol *sym, __u64 ip, char level) | ||
680 | { | ||
681 | struct rb_node **p = &hist.rb_node; | ||
682 | struct rb_node *parent = NULL; | ||
683 | struct hist_entry *he; | ||
684 | struct hist_entry entry = { | ||
685 | .thread = thread, | ||
686 | .map = map, | ||
687 | .dso = dso, | ||
688 | .sym = sym, | ||
689 | .ip = ip, | ||
690 | .level = level, | ||
691 | .count = 1, | ||
692 | }; | ||
693 | int cmp; | ||
694 | |||
695 | while (*p != NULL) { | ||
696 | parent = *p; | ||
697 | he = rb_entry(parent, struct hist_entry, rb_node); | ||
698 | |||
699 | cmp = hist_entry__cmp(&entry, he); | ||
700 | |||
701 | if (!cmp) { | ||
702 | hist_hit(he, ip); | ||
703 | |||
704 | return 0; | ||
705 | } | ||
706 | |||
707 | if (cmp < 0) | ||
708 | p = &(*p)->rb_left; | ||
709 | else | ||
710 | p = &(*p)->rb_right; | ||
711 | } | ||
712 | |||
713 | he = malloc(sizeof(*he)); | ||
714 | if (!he) | ||
715 | return -ENOMEM; | ||
716 | *he = entry; | ||
717 | rb_link_node(&he->rb_node, parent, p); | ||
718 | rb_insert_color(&he->rb_node, &hist); | ||
719 | |||
720 | return 0; | ||
721 | } | ||
722 | |||
723 | static void hist_entry__free(struct hist_entry *he) | ||
724 | { | ||
725 | free(he); | ||
726 | } | ||
727 | |||
728 | /* | ||
729 | * collapse the histogram | ||
730 | */ | ||
731 | |||
732 | static struct rb_root collapse_hists; | ||
733 | |||
734 | static void collapse__insert_entry(struct hist_entry *he) | ||
735 | { | ||
736 | struct rb_node **p = &collapse_hists.rb_node; | ||
737 | struct rb_node *parent = NULL; | ||
738 | struct hist_entry *iter; | ||
739 | int64_t cmp; | ||
740 | |||
741 | while (*p != NULL) { | ||
742 | parent = *p; | ||
743 | iter = rb_entry(parent, struct hist_entry, rb_node); | ||
744 | |||
745 | cmp = hist_entry__collapse(iter, he); | ||
746 | |||
747 | if (!cmp) { | ||
748 | iter->count += he->count; | ||
749 | hist_entry__free(he); | ||
750 | return; | ||
751 | } | ||
752 | |||
753 | if (cmp < 0) | ||
754 | p = &(*p)->rb_left; | ||
755 | else | ||
756 | p = &(*p)->rb_right; | ||
757 | } | ||
758 | |||
759 | rb_link_node(&he->rb_node, parent, p); | ||
760 | rb_insert_color(&he->rb_node, &collapse_hists); | ||
761 | } | ||
762 | |||
763 | static void collapse__resort(void) | ||
764 | { | ||
765 | struct rb_node *next; | ||
766 | struct hist_entry *n; | ||
767 | |||
768 | if (!sort__need_collapse) | ||
769 | return; | ||
770 | |||
771 | next = rb_first(&hist); | ||
772 | while (next) { | ||
773 | n = rb_entry(next, struct hist_entry, rb_node); | ||
774 | next = rb_next(&n->rb_node); | ||
775 | |||
776 | rb_erase(&n->rb_node, &hist); | ||
777 | collapse__insert_entry(n); | ||
778 | } | ||
779 | } | ||
780 | |||
781 | /* | ||
782 | * reverse the map, sort on count. | ||
783 | */ | ||
784 | |||
785 | static struct rb_root output_hists; | ||
786 | |||
787 | static void output__insert_entry(struct hist_entry *he) | ||
788 | { | ||
789 | struct rb_node **p = &output_hists.rb_node; | ||
790 | struct rb_node *parent = NULL; | ||
791 | struct hist_entry *iter; | ||
792 | |||
793 | while (*p != NULL) { | ||
794 | parent = *p; | ||
795 | iter = rb_entry(parent, struct hist_entry, rb_node); | ||
796 | |||
797 | if (he->count > iter->count) | ||
798 | p = &(*p)->rb_left; | ||
799 | else | ||
800 | p = &(*p)->rb_right; | ||
801 | } | ||
802 | |||
803 | rb_link_node(&he->rb_node, parent, p); | ||
804 | rb_insert_color(&he->rb_node, &output_hists); | ||
805 | } | ||
806 | |||
807 | static void output__resort(void) | ||
808 | { | ||
809 | struct rb_node *next; | ||
810 | struct hist_entry *n; | ||
811 | struct rb_root *tree = &hist; | ||
812 | |||
813 | if (sort__need_collapse) | ||
814 | tree = &collapse_hists; | ||
815 | |||
816 | next = rb_first(tree); | ||
817 | |||
818 | while (next) { | ||
819 | n = rb_entry(next, struct hist_entry, rb_node); | ||
820 | next = rb_next(&n->rb_node); | ||
821 | |||
822 | rb_erase(&n->rb_node, tree); | ||
823 | output__insert_entry(n); | ||
824 | } | ||
825 | } | ||
826 | |||
827 | static void register_idle_thread(void) | ||
828 | { | ||
829 | struct thread *thread = threads__findnew(0); | ||
830 | |||
831 | if (thread == NULL || | ||
832 | thread__set_comm(thread, "[idle]")) { | ||
833 | fprintf(stderr, "problem inserting idle task.\n"); | ||
834 | exit(-1); | ||
835 | } | ||
836 | } | ||
837 | |||
838 | static unsigned long total = 0, | ||
839 | total_mmap = 0, | ||
840 | total_comm = 0, | ||
841 | total_fork = 0, | ||
842 | total_unknown = 0; | ||
843 | |||
844 | static int | ||
845 | process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | ||
846 | { | ||
847 | char level; | ||
848 | int show = 0; | ||
849 | struct dso *dso = NULL; | ||
850 | struct thread *thread = threads__findnew(event->ip.pid); | ||
851 | __u64 ip = event->ip.ip; | ||
852 | struct map *map = NULL; | ||
853 | |||
854 | dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n", | ||
855 | (void *)(offset + head), | ||
856 | (void *)(long)(event->header.size), | ||
857 | event->header.misc, | ||
858 | event->ip.pid, | ||
859 | (void *)(long)ip); | ||
860 | |||
861 | dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); | ||
862 | |||
863 | if (thread == NULL) { | ||
864 | fprintf(stderr, "problem processing %d event, skipping it.\n", | ||
865 | event->header.type); | ||
866 | return -1; | ||
867 | } | ||
868 | |||
869 | if (event->header.misc & PERF_EVENT_MISC_KERNEL) { | ||
870 | show = SHOW_KERNEL; | ||
871 | level = 'k'; | ||
872 | |||
873 | dso = kernel_dso; | ||
874 | |||
875 | dprintf(" ...... dso: %s\n", dso->name); | ||
876 | |||
877 | } else if (event->header.misc & PERF_EVENT_MISC_USER) { | ||
878 | |||
879 | show = SHOW_USER; | ||
880 | level = '.'; | ||
881 | |||
882 | map = thread__find_map(thread, ip); | ||
883 | if (map != NULL) { | ||
884 | ip = map->map_ip(map, ip); | ||
885 | dso = map->dso; | ||
886 | } else { | ||
887 | /* | ||
888 | * If this is outside of all known maps, | ||
889 | * and is a negative address, try to look it | ||
890 | * up in the kernel dso, as it might be a | ||
891 | * vsyscall (which executes in user-mode): | ||
892 | */ | ||
893 | if ((long long)ip < 0) | ||
894 | dso = kernel_dso; | ||
895 | } | ||
896 | dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>"); | ||
897 | |||
898 | } else { | ||
899 | show = SHOW_HV; | ||
900 | level = 'H'; | ||
901 | dprintf(" ...... dso: [hypervisor]\n"); | ||
902 | } | ||
903 | |||
904 | if (show & show_mask) { | ||
905 | struct symbol *sym = NULL; | ||
906 | |||
907 | if (dso) | ||
908 | sym = dso->find_symbol(dso, ip); | ||
909 | |||
910 | if (hist_entry__add(thread, map, dso, sym, ip, level)) { | ||
911 | fprintf(stderr, | ||
912 | "problem incrementing symbol count, skipping event\n"); | ||
913 | return -1; | ||
914 | } | ||
915 | } | ||
916 | total++; | ||
917 | |||
918 | return 0; | ||
919 | } | ||
920 | |||
921 | static int | ||
922 | process_mmap_event(event_t *event, unsigned long offset, unsigned long head) | ||
923 | { | ||
924 | struct thread *thread = threads__findnew(event->mmap.pid); | ||
925 | struct map *map = map__new(&event->mmap); | ||
926 | |||
927 | dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", | ||
928 | (void *)(offset + head), | ||
929 | (void *)(long)(event->header.size), | ||
930 | event->mmap.pid, | ||
931 | (void *)(long)event->mmap.start, | ||
932 | (void *)(long)event->mmap.len, | ||
933 | (void *)(long)event->mmap.pgoff, | ||
934 | event->mmap.filename); | ||
935 | |||
936 | if (thread == NULL || map == NULL) { | ||
937 | dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n"); | ||
938 | return 0; | ||
939 | } | ||
940 | |||
941 | thread__insert_map(thread, map); | ||
942 | total_mmap++; | ||
943 | |||
944 | return 0; | ||
945 | } | ||
946 | |||
947 | static int | ||
948 | process_comm_event(event_t *event, unsigned long offset, unsigned long head) | ||
949 | { | ||
950 | struct thread *thread = threads__findnew(event->comm.pid); | ||
951 | |||
952 | dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", | ||
953 | (void *)(offset + head), | ||
954 | (void *)(long)(event->header.size), | ||
955 | event->comm.comm, event->comm.pid); | ||
956 | |||
957 | if (thread == NULL || | ||
958 | thread__set_comm(thread, event->comm.comm)) { | ||
959 | dprintf("problem processing PERF_EVENT_COMM, skipping event.\n"); | ||
960 | return -1; | ||
961 | } | ||
962 | total_comm++; | ||
963 | |||
964 | return 0; | ||
965 | } | ||
966 | |||
967 | static int | ||
968 | process_fork_event(event_t *event, unsigned long offset, unsigned long head) | ||
969 | { | ||
970 | struct thread *thread = threads__findnew(event->fork.pid); | ||
971 | struct thread *parent = threads__findnew(event->fork.ppid); | ||
972 | |||
973 | dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n", | ||
974 | (void *)(offset + head), | ||
975 | (void *)(long)(event->header.size), | ||
976 | event->fork.pid, event->fork.ppid); | ||
977 | |||
978 | if (!thread || !parent || thread__fork(thread, parent)) { | ||
979 | dprintf("problem processing PERF_EVENT_FORK, skipping event.\n"); | ||
980 | return -1; | ||
981 | } | ||
982 | total_fork++; | ||
983 | |||
984 | return 0; | ||
985 | } | ||
986 | |||
987 | static int | ||
988 | process_period_event(event_t *event, unsigned long offset, unsigned long head) | ||
989 | { | ||
990 | dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n", | ||
991 | (void *)(offset + head), | ||
992 | (void *)(long)(event->header.size), | ||
993 | event->period.time, | ||
994 | event->period.id, | ||
995 | event->period.sample_period); | ||
996 | |||
997 | return 0; | ||
998 | } | ||
999 | |||
1000 | static int | ||
1001 | process_event(event_t *event, unsigned long offset, unsigned long head) | ||
1002 | { | ||
1003 | if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) | ||
1004 | return process_overflow_event(event, offset, head); | ||
1005 | |||
1006 | switch (event->header.type) { | ||
1007 | case PERF_EVENT_MMAP: | ||
1008 | return process_mmap_event(event, offset, head); | ||
1009 | |||
1010 | case PERF_EVENT_COMM: | ||
1011 | return process_comm_event(event, offset, head); | ||
1012 | |||
1013 | case PERF_EVENT_FORK: | ||
1014 | return process_fork_event(event, offset, head); | ||
1015 | |||
1016 | case PERF_EVENT_PERIOD: | ||
1017 | return process_period_event(event, offset, head); | ||
1018 | /* | ||
1019 | * We dont process them right now but they are fine: | ||
1020 | */ | ||
1021 | |||
1022 | case PERF_EVENT_THROTTLE: | ||
1023 | case PERF_EVENT_UNTHROTTLE: | ||
1024 | return 0; | ||
1025 | |||
1026 | default: | ||
1027 | return -1; | ||
1028 | } | ||
1029 | |||
1030 | return 0; | ||
1031 | } | ||
1032 | |||
1033 | static int | ||
1034 | parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) | ||
1035 | { | ||
1036 | char *line = NULL, *tmp, *tmp2; | ||
1037 | unsigned int offset; | ||
1038 | size_t line_len; | ||
1039 | __u64 line_ip; | ||
1040 | int ret; | ||
1041 | char *c; | ||
1042 | |||
1043 | if (getline(&line, &line_len, file) < 0) | ||
1044 | return -1; | ||
1045 | if (!line) | ||
1046 | return -1; | ||
1047 | |||
1048 | c = strchr(line, '\n'); | ||
1049 | if (c) | ||
1050 | *c = 0; | ||
1051 | |||
1052 | line_ip = -1; | ||
1053 | offset = 0; | ||
1054 | ret = -2; | ||
1055 | |||
1056 | /* | ||
1057 | * Strip leading spaces: | ||
1058 | */ | ||
1059 | tmp = line; | ||
1060 | while (*tmp) { | ||
1061 | if (*tmp != ' ') | ||
1062 | break; | ||
1063 | tmp++; | ||
1064 | } | ||
1065 | |||
1066 | if (*tmp) { | ||
1067 | /* | ||
1068 | * Parse hexa addresses followed by ':' | ||
1069 | */ | ||
1070 | line_ip = strtoull(tmp, &tmp2, 16); | ||
1071 | if (*tmp2 != ':') | ||
1072 | line_ip = -1; | ||
1073 | } | ||
1074 | |||
1075 | if (line_ip != -1) { | ||
1076 | unsigned int hits = 0; | ||
1077 | double percent = 0.0; | ||
1078 | char *color = PERF_COLOR_NORMAL; | ||
1079 | |||
1080 | offset = line_ip - start; | ||
1081 | if (offset < len) | ||
1082 | hits = sym->hist[offset]; | ||
1083 | |||
1084 | if (sym->hist_sum) | ||
1085 | percent = 100.0 * hits / sym->hist_sum; | ||
1086 | |||
1087 | /* | ||
1088 | * We color high-overhead entries in red, mid-overhead | ||
1089 | * entries in green - and keep the low overhead places | ||
1090 | * normal: | ||
1091 | */ | ||
1092 | if (percent >= 5.0) | ||
1093 | color = PERF_COLOR_RED; | ||
1094 | else { | ||
1095 | if (percent > 0.5) | ||
1096 | color = PERF_COLOR_GREEN; | ||
1097 | } | ||
1098 | |||
1099 | color_fprintf(stdout, color, " %7.2f", percent); | ||
1100 | printf(" : "); | ||
1101 | color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", line); | ||
1102 | } else { | ||
1103 | if (!*line) | ||
1104 | printf(" :\n"); | ||
1105 | else | ||
1106 | printf(" : %s\n", line); | ||
1107 | } | ||
1108 | |||
1109 | return 0; | ||
1110 | } | ||
1111 | |||
1112 | static void annotate_sym(struct dso *dso, struct symbol *sym) | ||
1113 | { | ||
1114 | char *filename = dso->name; | ||
1115 | __u64 start, end, len; | ||
1116 | char command[PATH_MAX*2]; | ||
1117 | FILE *file; | ||
1118 | |||
1119 | if (!filename) | ||
1120 | return; | ||
1121 | if (dso == kernel_dso) | ||
1122 | filename = vmlinux; | ||
1123 | |||
1124 | printf("\n------------------------------------------------\n"); | ||
1125 | printf(" Percent | Source code & Disassembly of %s\n", filename); | ||
1126 | printf("------------------------------------------------\n"); | ||
1127 | |||
1128 | if (verbose >= 2) | ||
1129 | printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); | ||
1130 | |||
1131 | start = sym->obj_start; | ||
1132 | if (!start) | ||
1133 | start = sym->start; | ||
1134 | |||
1135 | end = start + sym->end - sym->start + 1; | ||
1136 | len = sym->end - sym->start; | ||
1137 | |||
1138 | sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (__u64)start, (__u64)end, filename); | ||
1139 | |||
1140 | if (verbose >= 3) | ||
1141 | printf("doing: %s\n", command); | ||
1142 | |||
1143 | file = popen(command, "r"); | ||
1144 | if (!file) | ||
1145 | return; | ||
1146 | |||
1147 | while (!feof(file)) { | ||
1148 | if (parse_line(file, sym, start, len) < 0) | ||
1149 | break; | ||
1150 | } | ||
1151 | |||
1152 | pclose(file); | ||
1153 | } | ||
1154 | |||
1155 | static void find_annotations(void) | ||
1156 | { | ||
1157 | struct rb_node *nd; | ||
1158 | struct dso *dso; | ||
1159 | int count = 0; | ||
1160 | |||
1161 | list_for_each_entry(dso, &dsos, node) { | ||
1162 | |||
1163 | for (nd = rb_first(&dso->syms); nd; nd = rb_next(nd)) { | ||
1164 | struct symbol *sym = rb_entry(nd, struct symbol, rb_node); | ||
1165 | |||
1166 | if (sym->hist) { | ||
1167 | annotate_sym(dso, sym); | ||
1168 | count++; | ||
1169 | } | ||
1170 | } | ||
1171 | } | ||
1172 | |||
1173 | if (!count) | ||
1174 | printf(" Error: symbol '%s' not present amongst the samples.\n", sym_hist_filter); | ||
1175 | } | ||
1176 | |||
1177 | static int __cmd_annotate(void) | ||
1178 | { | ||
1179 | int ret, rc = EXIT_FAILURE; | ||
1180 | unsigned long offset = 0; | ||
1181 | unsigned long head = 0; | ||
1182 | struct stat stat; | ||
1183 | event_t *event; | ||
1184 | uint32_t size; | ||
1185 | char *buf; | ||
1186 | |||
1187 | register_idle_thread(); | ||
1188 | |||
1189 | input = open(input_name, O_RDONLY); | ||
1190 | if (input < 0) { | ||
1191 | perror("failed to open file"); | ||
1192 | exit(-1); | ||
1193 | } | ||
1194 | |||
1195 | ret = fstat(input, &stat); | ||
1196 | if (ret < 0) { | ||
1197 | perror("failed to stat file"); | ||
1198 | exit(-1); | ||
1199 | } | ||
1200 | |||
1201 | if (!stat.st_size) { | ||
1202 | fprintf(stderr, "zero-sized file, nothing to do!\n"); | ||
1203 | exit(0); | ||
1204 | } | ||
1205 | |||
1206 | if (load_kernel() < 0) { | ||
1207 | perror("failed to load kernel symbols"); | ||
1208 | return EXIT_FAILURE; | ||
1209 | } | ||
1210 | |||
1211 | remap: | ||
1212 | buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, | ||
1213 | MAP_SHARED, input, offset); | ||
1214 | if (buf == MAP_FAILED) { | ||
1215 | perror("failed to mmap file"); | ||
1216 | exit(-1); | ||
1217 | } | ||
1218 | |||
1219 | more: | ||
1220 | event = (event_t *)(buf + head); | ||
1221 | |||
1222 | size = event->header.size; | ||
1223 | if (!size) | ||
1224 | size = 8; | ||
1225 | |||
1226 | if (head + event->header.size >= page_size * mmap_window) { | ||
1227 | unsigned long shift = page_size * (head / page_size); | ||
1228 | int ret; | ||
1229 | |||
1230 | ret = munmap(buf, page_size * mmap_window); | ||
1231 | assert(ret == 0); | ||
1232 | |||
1233 | offset += shift; | ||
1234 | head -= shift; | ||
1235 | goto remap; | ||
1236 | } | ||
1237 | |||
1238 | size = event->header.size; | ||
1239 | |||
1240 | dprintf("%p [%p]: event: %d\n", | ||
1241 | (void *)(offset + head), | ||
1242 | (void *)(long)event->header.size, | ||
1243 | event->header.type); | ||
1244 | |||
1245 | if (!size || process_event(event, offset, head) < 0) { | ||
1246 | |||
1247 | dprintf("%p [%p]: skipping unknown header type: %d\n", | ||
1248 | (void *)(offset + head), | ||
1249 | (void *)(long)(event->header.size), | ||
1250 | event->header.type); | ||
1251 | |||
1252 | total_unknown++; | ||
1253 | |||
1254 | /* | ||
1255 | * assume we lost track of the stream, check alignment, and | ||
1256 | * increment a single u64 in the hope to catch on again 'soon'. | ||
1257 | */ | ||
1258 | |||
1259 | if (unlikely(head & 7)) | ||
1260 | head &= ~7ULL; | ||
1261 | |||
1262 | size = 8; | ||
1263 | } | ||
1264 | |||
1265 | head += size; | ||
1266 | |||
1267 | if (offset + head < stat.st_size) | ||
1268 | goto more; | ||
1269 | |||
1270 | rc = EXIT_SUCCESS; | ||
1271 | close(input); | ||
1272 | |||
1273 | dprintf(" IP events: %10ld\n", total); | ||
1274 | dprintf(" mmap events: %10ld\n", total_mmap); | ||
1275 | dprintf(" comm events: %10ld\n", total_comm); | ||
1276 | dprintf(" fork events: %10ld\n", total_fork); | ||
1277 | dprintf(" unknown events: %10ld\n", total_unknown); | ||
1278 | |||
1279 | if (dump_trace) | ||
1280 | return 0; | ||
1281 | |||
1282 | if (verbose >= 3) | ||
1283 | threads__fprintf(stdout); | ||
1284 | |||
1285 | if (verbose >= 2) | ||
1286 | dsos__fprintf(stdout); | ||
1287 | |||
1288 | collapse__resort(); | ||
1289 | output__resort(); | ||
1290 | |||
1291 | find_annotations(); | ||
1292 | |||
1293 | return rc; | ||
1294 | } | ||
1295 | |||
1296 | static const char * const annotate_usage[] = { | ||
1297 | "perf annotate [<options>] <command>", | ||
1298 | NULL | ||
1299 | }; | ||
1300 | |||
1301 | static const struct option options[] = { | ||
1302 | OPT_STRING('i', "input", &input_name, "file", | ||
1303 | "input file name"), | ||
1304 | OPT_STRING('s', "symbol", &sym_hist_filter, "symbol", | ||
1305 | "symbol to annotate"), | ||
1306 | OPT_BOOLEAN('v', "verbose", &verbose, | ||
1307 | "be more verbose (show symbol address, etc)"), | ||
1308 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | ||
1309 | "dump raw trace in ASCII"), | ||
1310 | OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), | ||
1311 | OPT_END() | ||
1312 | }; | ||
1313 | |||
1314 | static void setup_sorting(void) | ||
1315 | { | ||
1316 | char *tmp, *tok, *str = strdup(sort_order); | ||
1317 | |||
1318 | for (tok = strtok_r(str, ", ", &tmp); | ||
1319 | tok; tok = strtok_r(NULL, ", ", &tmp)) { | ||
1320 | if (sort_dimension__add(tok) < 0) { | ||
1321 | error("Unknown --sort key: `%s'", tok); | ||
1322 | usage_with_options(annotate_usage, options); | ||
1323 | } | ||
1324 | } | ||
1325 | |||
1326 | free(str); | ||
1327 | } | ||
1328 | |||
1329 | int cmd_annotate(int argc, const char **argv, const char *prefix) | ||
1330 | { | ||
1331 | symbol__init(); | ||
1332 | |||
1333 | page_size = getpagesize(); | ||
1334 | |||
1335 | argc = parse_options(argc, argv, options, annotate_usage, 0); | ||
1336 | |||
1337 | setup_sorting(); | ||
1338 | |||
1339 | if (argc) { | ||
1340 | /* | ||
1341 | * Special case: if there's an argument left then assume tha | ||
1342 | * it's a symbol filter: | ||
1343 | */ | ||
1344 | if (argc > 1) | ||
1345 | usage_with_options(annotate_usage, options); | ||
1346 | |||
1347 | sym_hist_filter = argv[0]; | ||
1348 | } | ||
1349 | |||
1350 | if (!sym_hist_filter) | ||
1351 | usage_with_options(annotate_usage, options); | ||
1352 | |||
1353 | setup_pager(); | ||
1354 | |||
1355 | return __cmd_annotate(); | ||
1356 | } | ||
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c new file mode 100644 index 000000000000..0f32dc3f3c4c --- /dev/null +++ b/tools/perf/builtin-help.c | |||
@@ -0,0 +1,461 @@ | |||
1 | /* | ||
2 | * builtin-help.c | ||
3 | * | ||
4 | * Builtin help command | ||
5 | */ | ||
6 | #include "util/cache.h" | ||
7 | #include "builtin.h" | ||
8 | #include "util/exec_cmd.h" | ||
9 | #include "common-cmds.h" | ||
10 | #include "util/parse-options.h" | ||
11 | #include "util/run-command.h" | ||
12 | #include "util/help.h" | ||
13 | |||
14 | static struct man_viewer_list { | ||
15 | struct man_viewer_list *next; | ||
16 | char name[FLEX_ARRAY]; | ||
17 | } *man_viewer_list; | ||
18 | |||
19 | static struct man_viewer_info_list { | ||
20 | struct man_viewer_info_list *next; | ||
21 | const char *info; | ||
22 | char name[FLEX_ARRAY]; | ||
23 | } *man_viewer_info_list; | ||
24 | |||
25 | enum help_format { | ||
26 | HELP_FORMAT_MAN, | ||
27 | HELP_FORMAT_INFO, | ||
28 | HELP_FORMAT_WEB, | ||
29 | }; | ||
30 | |||
31 | static int show_all = 0; | ||
32 | static enum help_format help_format = HELP_FORMAT_MAN; | ||
33 | static struct option builtin_help_options[] = { | ||
34 | OPT_BOOLEAN('a', "all", &show_all, "print all available commands"), | ||
35 | OPT_SET_INT('m', "man", &help_format, "show man page", HELP_FORMAT_MAN), | ||
36 | OPT_SET_INT('w', "web", &help_format, "show manual in web browser", | ||
37 | HELP_FORMAT_WEB), | ||
38 | OPT_SET_INT('i', "info", &help_format, "show info page", | ||
39 | HELP_FORMAT_INFO), | ||
40 | OPT_END(), | ||
41 | }; | ||
42 | |||
43 | static const char * const builtin_help_usage[] = { | ||
44 | "perf help [--all] [--man|--web|--info] [command]", | ||
45 | NULL | ||
46 | }; | ||
47 | |||
48 | static enum help_format parse_help_format(const char *format) | ||
49 | { | ||
50 | if (!strcmp(format, "man")) | ||
51 | return HELP_FORMAT_MAN; | ||
52 | if (!strcmp(format, "info")) | ||
53 | return HELP_FORMAT_INFO; | ||
54 | if (!strcmp(format, "web") || !strcmp(format, "html")) | ||
55 | return HELP_FORMAT_WEB; | ||
56 | die("unrecognized help format '%s'", format); | ||
57 | } | ||
58 | |||
59 | static const char *get_man_viewer_info(const char *name) | ||
60 | { | ||
61 | struct man_viewer_info_list *viewer; | ||
62 | |||
63 | for (viewer = man_viewer_info_list; viewer; viewer = viewer->next) | ||
64 | { | ||
65 | if (!strcasecmp(name, viewer->name)) | ||
66 | return viewer->info; | ||
67 | } | ||
68 | return NULL; | ||
69 | } | ||
70 | |||
71 | static int check_emacsclient_version(void) | ||
72 | { | ||
73 | struct strbuf buffer = STRBUF_INIT; | ||
74 | struct child_process ec_process; | ||
75 | const char *argv_ec[] = { "emacsclient", "--version", NULL }; | ||
76 | int version; | ||
77 | |||
78 | /* emacsclient prints its version number on stderr */ | ||
79 | memset(&ec_process, 0, sizeof(ec_process)); | ||
80 | ec_process.argv = argv_ec; | ||
81 | ec_process.err = -1; | ||
82 | ec_process.stdout_to_stderr = 1; | ||
83 | if (start_command(&ec_process)) { | ||
84 | fprintf(stderr, "Failed to start emacsclient.\n"); | ||
85 | return -1; | ||
86 | } | ||
87 | strbuf_read(&buffer, ec_process.err, 20); | ||
88 | close(ec_process.err); | ||
89 | |||
90 | /* | ||
91 | * Don't bother checking return value, because "emacsclient --version" | ||
92 | * seems to always exits with code 1. | ||
93 | */ | ||
94 | finish_command(&ec_process); | ||
95 | |||
96 | if (prefixcmp(buffer.buf, "emacsclient")) { | ||
97 | fprintf(stderr, "Failed to parse emacsclient version.\n"); | ||
98 | strbuf_release(&buffer); | ||
99 | return -1; | ||
100 | } | ||
101 | |||
102 | strbuf_remove(&buffer, 0, strlen("emacsclient")); | ||
103 | version = atoi(buffer.buf); | ||
104 | |||
105 | if (version < 22) { | ||
106 | fprintf(stderr, | ||
107 | "emacsclient version '%d' too old (< 22).\n", | ||
108 | version); | ||
109 | strbuf_release(&buffer); | ||
110 | return -1; | ||
111 | } | ||
112 | |||
113 | strbuf_release(&buffer); | ||
114 | return 0; | ||
115 | } | ||
116 | |||
117 | static void exec_woman_emacs(const char* path, const char *page) | ||
118 | { | ||
119 | if (!check_emacsclient_version()) { | ||
120 | /* This works only with emacsclient version >= 22. */ | ||
121 | struct strbuf man_page = STRBUF_INIT; | ||
122 | |||
123 | if (!path) | ||
124 | path = "emacsclient"; | ||
125 | strbuf_addf(&man_page, "(woman \"%s\")", page); | ||
126 | execlp(path, "emacsclient", "-e", man_page.buf, NULL); | ||
127 | warning("failed to exec '%s': %s", path, strerror(errno)); | ||
128 | } | ||
129 | } | ||
130 | |||
131 | static void exec_man_konqueror(const char* path, const char *page) | ||
132 | { | ||
133 | const char *display = getenv("DISPLAY"); | ||
134 | if (display && *display) { | ||
135 | struct strbuf man_page = STRBUF_INIT; | ||
136 | const char *filename = "kfmclient"; | ||
137 | |||
138 | /* It's simpler to launch konqueror using kfmclient. */ | ||
139 | if (path) { | ||
140 | const char *file = strrchr(path, '/'); | ||
141 | if (file && !strcmp(file + 1, "konqueror")) { | ||
142 | char *new = strdup(path); | ||
143 | char *dest = strrchr(new, '/'); | ||
144 | |||
145 | /* strlen("konqueror") == strlen("kfmclient") */ | ||
146 | strcpy(dest + 1, "kfmclient"); | ||
147 | path = new; | ||
148 | } | ||
149 | if (file) | ||
150 | filename = file; | ||
151 | } else | ||
152 | path = "kfmclient"; | ||
153 | strbuf_addf(&man_page, "man:%s(1)", page); | ||
154 | execlp(path, filename, "newTab", man_page.buf, NULL); | ||
155 | warning("failed to exec '%s': %s", path, strerror(errno)); | ||
156 | } | ||
157 | } | ||
158 | |||
159 | static void exec_man_man(const char* path, const char *page) | ||
160 | { | ||
161 | if (!path) | ||
162 | path = "man"; | ||
163 | execlp(path, "man", page, NULL); | ||
164 | warning("failed to exec '%s': %s", path, strerror(errno)); | ||
165 | } | ||
166 | |||
167 | static void exec_man_cmd(const char *cmd, const char *page) | ||
168 | { | ||
169 | struct strbuf shell_cmd = STRBUF_INIT; | ||
170 | strbuf_addf(&shell_cmd, "%s %s", cmd, page); | ||
171 | execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL); | ||
172 | warning("failed to exec '%s': %s", cmd, strerror(errno)); | ||
173 | } | ||
174 | |||
175 | static void add_man_viewer(const char *name) | ||
176 | { | ||
177 | struct man_viewer_list **p = &man_viewer_list; | ||
178 | size_t len = strlen(name); | ||
179 | |||
180 | while (*p) | ||
181 | p = &((*p)->next); | ||
182 | *p = calloc(1, (sizeof(**p) + len + 1)); | ||
183 | strncpy((*p)->name, name, len); | ||
184 | } | ||
185 | |||
186 | static int supported_man_viewer(const char *name, size_t len) | ||
187 | { | ||
188 | return (!strncasecmp("man", name, len) || | ||
189 | !strncasecmp("woman", name, len) || | ||
190 | !strncasecmp("konqueror", name, len)); | ||
191 | } | ||
192 | |||
193 | static void do_add_man_viewer_info(const char *name, | ||
194 | size_t len, | ||
195 | const char *value) | ||
196 | { | ||
197 | struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1); | ||
198 | |||
199 | strncpy(new->name, name, len); | ||
200 | new->info = strdup(value); | ||
201 | new->next = man_viewer_info_list; | ||
202 | man_viewer_info_list = new; | ||
203 | } | ||
204 | |||
205 | static int add_man_viewer_path(const char *name, | ||
206 | size_t len, | ||
207 | const char *value) | ||
208 | { | ||
209 | if (supported_man_viewer(name, len)) | ||
210 | do_add_man_viewer_info(name, len, value); | ||
211 | else | ||
212 | warning("'%s': path for unsupported man viewer.\n" | ||
213 | "Please consider using 'man.<tool>.cmd' instead.", | ||
214 | name); | ||
215 | |||
216 | return 0; | ||
217 | } | ||
218 | |||
219 | static int add_man_viewer_cmd(const char *name, | ||
220 | size_t len, | ||
221 | const char *value) | ||
222 | { | ||
223 | if (supported_man_viewer(name, len)) | ||
224 | warning("'%s': cmd for supported man viewer.\n" | ||
225 | "Please consider using 'man.<tool>.path' instead.", | ||
226 | name); | ||
227 | else | ||
228 | do_add_man_viewer_info(name, len, value); | ||
229 | |||
230 | return 0; | ||
231 | } | ||
232 | |||
233 | static int add_man_viewer_info(const char *var, const char *value) | ||
234 | { | ||
235 | const char *name = var + 4; | ||
236 | const char *subkey = strrchr(name, '.'); | ||
237 | |||
238 | if (!subkey) | ||
239 | return error("Config with no key for man viewer: %s", name); | ||
240 | |||
241 | if (!strcmp(subkey, ".path")) { | ||
242 | if (!value) | ||
243 | return config_error_nonbool(var); | ||
244 | return add_man_viewer_path(name, subkey - name, value); | ||
245 | } | ||
246 | if (!strcmp(subkey, ".cmd")) { | ||
247 | if (!value) | ||
248 | return config_error_nonbool(var); | ||
249 | return add_man_viewer_cmd(name, subkey - name, value); | ||
250 | } | ||
251 | |||
252 | warning("'%s': unsupported man viewer sub key.", subkey); | ||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | static int perf_help_config(const char *var, const char *value, void *cb) | ||
257 | { | ||
258 | if (!strcmp(var, "help.format")) { | ||
259 | if (!value) | ||
260 | return config_error_nonbool(var); | ||
261 | help_format = parse_help_format(value); | ||
262 | return 0; | ||
263 | } | ||
264 | if (!strcmp(var, "man.viewer")) { | ||
265 | if (!value) | ||
266 | return config_error_nonbool(var); | ||
267 | add_man_viewer(value); | ||
268 | return 0; | ||
269 | } | ||
270 | if (!prefixcmp(var, "man.")) | ||
271 | return add_man_viewer_info(var, value); | ||
272 | |||
273 | return perf_default_config(var, value, cb); | ||
274 | } | ||
275 | |||
276 | static struct cmdnames main_cmds, other_cmds; | ||
277 | |||
278 | void list_common_cmds_help(void) | ||
279 | { | ||
280 | int i, longest = 0; | ||
281 | |||
282 | for (i = 0; i < ARRAY_SIZE(common_cmds); i++) { | ||
283 | if (longest < strlen(common_cmds[i].name)) | ||
284 | longest = strlen(common_cmds[i].name); | ||
285 | } | ||
286 | |||
287 | puts(" The most commonly used perf commands are:"); | ||
288 | for (i = 0; i < ARRAY_SIZE(common_cmds); i++) { | ||
289 | printf(" %s ", common_cmds[i].name); | ||
290 | mput_char(' ', longest - strlen(common_cmds[i].name)); | ||
291 | puts(common_cmds[i].help); | ||
292 | } | ||
293 | } | ||
294 | |||
295 | static int is_perf_command(const char *s) | ||
296 | { | ||
297 | return is_in_cmdlist(&main_cmds, s) || | ||
298 | is_in_cmdlist(&other_cmds, s); | ||
299 | } | ||
300 | |||
301 | static const char *prepend(const char *prefix, const char *cmd) | ||
302 | { | ||
303 | size_t pre_len = strlen(prefix); | ||
304 | size_t cmd_len = strlen(cmd); | ||
305 | char *p = malloc(pre_len + cmd_len + 1); | ||
306 | memcpy(p, prefix, pre_len); | ||
307 | strcpy(p + pre_len, cmd); | ||
308 | return p; | ||
309 | } | ||
310 | |||
311 | static const char *cmd_to_page(const char *perf_cmd) | ||
312 | { | ||
313 | if (!perf_cmd) | ||
314 | return "perf"; | ||
315 | else if (!prefixcmp(perf_cmd, "perf")) | ||
316 | return perf_cmd; | ||
317 | else if (is_perf_command(perf_cmd)) | ||
318 | return prepend("perf-", perf_cmd); | ||
319 | else | ||
320 | return prepend("perf-", perf_cmd); | ||
321 | } | ||
322 | |||
323 | static void setup_man_path(void) | ||
324 | { | ||
325 | struct strbuf new_path = STRBUF_INIT; | ||
326 | const char *old_path = getenv("MANPATH"); | ||
327 | |||
328 | /* We should always put ':' after our path. If there is no | ||
329 | * old_path, the ':' at the end will let 'man' to try | ||
330 | * system-wide paths after ours to find the manual page. If | ||
331 | * there is old_path, we need ':' as delimiter. */ | ||
332 | strbuf_addstr(&new_path, system_path(PERF_MAN_PATH)); | ||
333 | strbuf_addch(&new_path, ':'); | ||
334 | if (old_path) | ||
335 | strbuf_addstr(&new_path, old_path); | ||
336 | |||
337 | setenv("MANPATH", new_path.buf, 1); | ||
338 | |||
339 | strbuf_release(&new_path); | ||
340 | } | ||
341 | |||
342 | static void exec_viewer(const char *name, const char *page) | ||
343 | { | ||
344 | const char *info = get_man_viewer_info(name); | ||
345 | |||
346 | if (!strcasecmp(name, "man")) | ||
347 | exec_man_man(info, page); | ||
348 | else if (!strcasecmp(name, "woman")) | ||
349 | exec_woman_emacs(info, page); | ||
350 | else if (!strcasecmp(name, "konqueror")) | ||
351 | exec_man_konqueror(info, page); | ||
352 | else if (info) | ||
353 | exec_man_cmd(info, page); | ||
354 | else | ||
355 | warning("'%s': unknown man viewer.", name); | ||
356 | } | ||
357 | |||
358 | static void show_man_page(const char *perf_cmd) | ||
359 | { | ||
360 | struct man_viewer_list *viewer; | ||
361 | const char *page = cmd_to_page(perf_cmd); | ||
362 | const char *fallback = getenv("PERF_MAN_VIEWER"); | ||
363 | |||
364 | setup_man_path(); | ||
365 | for (viewer = man_viewer_list; viewer; viewer = viewer->next) | ||
366 | { | ||
367 | exec_viewer(viewer->name, page); /* will return when unable */ | ||
368 | } | ||
369 | if (fallback) | ||
370 | exec_viewer(fallback, page); | ||
371 | exec_viewer("man", page); | ||
372 | die("no man viewer handled the request"); | ||
373 | } | ||
374 | |||
375 | static void show_info_page(const char *perf_cmd) | ||
376 | { | ||
377 | const char *page = cmd_to_page(perf_cmd); | ||
378 | setenv("INFOPATH", system_path(PERF_INFO_PATH), 1); | ||
379 | execlp("info", "info", "perfman", page, NULL); | ||
380 | } | ||
381 | |||
382 | static void get_html_page_path(struct strbuf *page_path, const char *page) | ||
383 | { | ||
384 | struct stat st; | ||
385 | const char *html_path = system_path(PERF_HTML_PATH); | ||
386 | |||
387 | /* Check that we have a perf documentation directory. */ | ||
388 | if (stat(mkpath("%s/perf.html", html_path), &st) | ||
389 | || !S_ISREG(st.st_mode)) | ||
390 | die("'%s': not a documentation directory.", html_path); | ||
391 | |||
392 | strbuf_init(page_path, 0); | ||
393 | strbuf_addf(page_path, "%s/%s.html", html_path, page); | ||
394 | } | ||
395 | |||
396 | /* | ||
397 | * If open_html is not defined in a platform-specific way (see for | ||
398 | * example compat/mingw.h), we use the script web--browse to display | ||
399 | * HTML. | ||
400 | */ | ||
401 | #ifndef open_html | ||
402 | static void open_html(const char *path) | ||
403 | { | ||
404 | execl_perf_cmd("web--browse", "-c", "help.browser", path, NULL); | ||
405 | } | ||
406 | #endif | ||
407 | |||
408 | static void show_html_page(const char *perf_cmd) | ||
409 | { | ||
410 | const char *page = cmd_to_page(perf_cmd); | ||
411 | struct strbuf page_path; /* it leaks but we exec bellow */ | ||
412 | |||
413 | get_html_page_path(&page_path, page); | ||
414 | |||
415 | open_html(page_path.buf); | ||
416 | } | ||
417 | |||
418 | int cmd_help(int argc, const char **argv, const char *prefix) | ||
419 | { | ||
420 | const char *alias; | ||
421 | load_command_list("perf-", &main_cmds, &other_cmds); | ||
422 | |||
423 | perf_config(perf_help_config, NULL); | ||
424 | |||
425 | argc = parse_options(argc, argv, builtin_help_options, | ||
426 | builtin_help_usage, 0); | ||
427 | |||
428 | if (show_all) { | ||
429 | printf("\n usage: %s\n\n", perf_usage_string); | ||
430 | list_commands("perf commands", &main_cmds, &other_cmds); | ||
431 | printf(" %s\n\n", perf_more_info_string); | ||
432 | return 0; | ||
433 | } | ||
434 | |||
435 | if (!argv[0]) { | ||
436 | printf("\n usage: %s\n\n", perf_usage_string); | ||
437 | list_common_cmds_help(); | ||
438 | printf("\n %s\n\n", perf_more_info_string); | ||
439 | return 0; | ||
440 | } | ||
441 | |||
442 | alias = alias_lookup(argv[0]); | ||
443 | if (alias && !is_perf_command(argv[0])) { | ||
444 | printf("`perf %s' is aliased to `%s'\n", argv[0], alias); | ||
445 | return 0; | ||
446 | } | ||
447 | |||
448 | switch (help_format) { | ||
449 | case HELP_FORMAT_MAN: | ||
450 | show_man_page(argv[0]); | ||
451 | break; | ||
452 | case HELP_FORMAT_INFO: | ||
453 | show_info_page(argv[0]); | ||
454 | break; | ||
455 | case HELP_FORMAT_WEB: | ||
456 | show_html_page(argv[0]); | ||
457 | break; | ||
458 | } | ||
459 | |||
460 | return 0; | ||
461 | } | ||
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c new file mode 100644 index 000000000000..fe60e37c96ef --- /dev/null +++ b/tools/perf/builtin-list.c | |||
@@ -0,0 +1,20 @@ | |||
1 | /* | ||
2 | * builtin-list.c | ||
3 | * | ||
4 | * Builtin list command: list all event types | ||
5 | * | ||
6 | * Copyright (C) 2009, Thomas Gleixner <tglx@linutronix.de> | ||
7 | * Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | ||
8 | */ | ||
9 | #include "builtin.h" | ||
10 | |||
11 | #include "perf.h" | ||
12 | |||
13 | #include "util/parse-options.h" | ||
14 | #include "util/parse-events.h" | ||
15 | |||
16 | int cmd_list(int argc, const char **argv, const char *prefix) | ||
17 | { | ||
18 | print_events(); | ||
19 | return 0; | ||
20 | } | ||
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c new file mode 100644 index 000000000000..29259e74dcfa --- /dev/null +++ b/tools/perf/builtin-record.c | |||
@@ -0,0 +1,582 @@ | |||
1 | /* | ||
2 | * builtin-record.c | ||
3 | * | ||
4 | * Builtin record command: Record the profile of a workload | ||
5 | * (or a CPU, or a PID) into the perf.data output file - for | ||
6 | * later analysis via perf report. | ||
7 | */ | ||
8 | #include "builtin.h" | ||
9 | |||
10 | #include "perf.h" | ||
11 | |||
12 | #include "util/util.h" | ||
13 | #include "util/parse-options.h" | ||
14 | #include "util/parse-events.h" | ||
15 | #include "util/string.h" | ||
16 | |||
17 | #include <unistd.h> | ||
18 | #include <sched.h> | ||
19 | |||
20 | #define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) | ||
21 | #define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) | ||
22 | |||
23 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
24 | |||
25 | static long default_interval = 100000; | ||
26 | |||
27 | static int nr_cpus = 0; | ||
28 | static unsigned int page_size; | ||
29 | static unsigned int mmap_pages = 128; | ||
30 | static int freq = 0; | ||
31 | static int output; | ||
32 | static const char *output_name = "perf.data"; | ||
33 | static int group = 0; | ||
34 | static unsigned int realtime_prio = 0; | ||
35 | static int system_wide = 0; | ||
36 | static pid_t target_pid = -1; | ||
37 | static int inherit = 1; | ||
38 | static int force = 0; | ||
39 | static int append_file = 0; | ||
40 | static int verbose = 0; | ||
41 | |||
42 | static long samples; | ||
43 | static struct timeval last_read; | ||
44 | static struct timeval this_read; | ||
45 | |||
46 | static __u64 bytes_written; | ||
47 | |||
48 | static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; | ||
49 | |||
50 | static int nr_poll; | ||
51 | static int nr_cpu; | ||
52 | |||
53 | struct mmap_event { | ||
54 | struct perf_event_header header; | ||
55 | __u32 pid; | ||
56 | __u32 tid; | ||
57 | __u64 start; | ||
58 | __u64 len; | ||
59 | __u64 pgoff; | ||
60 | char filename[PATH_MAX]; | ||
61 | }; | ||
62 | |||
63 | struct comm_event { | ||
64 | struct perf_event_header header; | ||
65 | __u32 pid; | ||
66 | __u32 tid; | ||
67 | char comm[16]; | ||
68 | }; | ||
69 | |||
70 | |||
71 | struct mmap_data { | ||
72 | int counter; | ||
73 | void *base; | ||
74 | unsigned int mask; | ||
75 | unsigned int prev; | ||
76 | }; | ||
77 | |||
78 | static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; | ||
79 | |||
80 | static unsigned int mmap_read_head(struct mmap_data *md) | ||
81 | { | ||
82 | struct perf_counter_mmap_page *pc = md->base; | ||
83 | int head; | ||
84 | |||
85 | head = pc->data_head; | ||
86 | rmb(); | ||
87 | |||
88 | return head; | ||
89 | } | ||
90 | |||
91 | static void mmap_read(struct mmap_data *md) | ||
92 | { | ||
93 | unsigned int head = mmap_read_head(md); | ||
94 | unsigned int old = md->prev; | ||
95 | unsigned char *data = md->base + page_size; | ||
96 | unsigned long size; | ||
97 | void *buf; | ||
98 | int diff; | ||
99 | |||
100 | gettimeofday(&this_read, NULL); | ||
101 | |||
102 | /* | ||
103 | * If we're further behind than half the buffer, there's a chance | ||
104 | * the writer will bite our tail and mess up the samples under us. | ||
105 | * | ||
106 | * If we somehow ended up ahead of the head, we got messed up. | ||
107 | * | ||
108 | * In either case, truncate and restart at head. | ||
109 | */ | ||
110 | diff = head - old; | ||
111 | if (diff > md->mask / 2 || diff < 0) { | ||
112 | struct timeval iv; | ||
113 | unsigned long msecs; | ||
114 | |||
115 | timersub(&this_read, &last_read, &iv); | ||
116 | msecs = iv.tv_sec*1000 + iv.tv_usec/1000; | ||
117 | |||
118 | fprintf(stderr, "WARNING: failed to keep up with mmap data." | ||
119 | " Last read %lu msecs ago.\n", msecs); | ||
120 | |||
121 | /* | ||
122 | * head points to a known good entry, start there. | ||
123 | */ | ||
124 | old = head; | ||
125 | } | ||
126 | |||
127 | last_read = this_read; | ||
128 | |||
129 | if (old != head) | ||
130 | samples++; | ||
131 | |||
132 | size = head - old; | ||
133 | |||
134 | if ((old & md->mask) + size != (head & md->mask)) { | ||
135 | buf = &data[old & md->mask]; | ||
136 | size = md->mask + 1 - (old & md->mask); | ||
137 | old += size; | ||
138 | |||
139 | while (size) { | ||
140 | int ret = write(output, buf, size); | ||
141 | |||
142 | if (ret < 0) | ||
143 | die("failed to write"); | ||
144 | |||
145 | size -= ret; | ||
146 | buf += ret; | ||
147 | |||
148 | bytes_written += ret; | ||
149 | } | ||
150 | } | ||
151 | |||
152 | buf = &data[old & md->mask]; | ||
153 | size = head - old; | ||
154 | old += size; | ||
155 | |||
156 | while (size) { | ||
157 | int ret = write(output, buf, size); | ||
158 | |||
159 | if (ret < 0) | ||
160 | die("failed to write"); | ||
161 | |||
162 | size -= ret; | ||
163 | buf += ret; | ||
164 | |||
165 | bytes_written += ret; | ||
166 | } | ||
167 | |||
168 | md->prev = old; | ||
169 | } | ||
170 | |||
171 | static volatile int done = 0; | ||
172 | static volatile int signr = -1; | ||
173 | |||
174 | static void sig_handler(int sig) | ||
175 | { | ||
176 | done = 1; | ||
177 | signr = sig; | ||
178 | } | ||
179 | |||
180 | static void sig_atexit(void) | ||
181 | { | ||
182 | if (signr == -1) | ||
183 | return; | ||
184 | |||
185 | signal(signr, SIG_DFL); | ||
186 | kill(getpid(), signr); | ||
187 | } | ||
188 | |||
189 | static void pid_synthesize_comm_event(pid_t pid, int full) | ||
190 | { | ||
191 | struct comm_event comm_ev; | ||
192 | char filename[PATH_MAX]; | ||
193 | char bf[BUFSIZ]; | ||
194 | int fd, ret; | ||
195 | size_t size; | ||
196 | char *field, *sep; | ||
197 | DIR *tasks; | ||
198 | struct dirent dirent, *next; | ||
199 | |||
200 | snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); | ||
201 | |||
202 | fd = open(filename, O_RDONLY); | ||
203 | if (fd < 0) { | ||
204 | fprintf(stderr, "couldn't open %s\n", filename); | ||
205 | exit(EXIT_FAILURE); | ||
206 | } | ||
207 | if (read(fd, bf, sizeof(bf)) < 0) { | ||
208 | fprintf(stderr, "couldn't read %s\n", filename); | ||
209 | exit(EXIT_FAILURE); | ||
210 | } | ||
211 | close(fd); | ||
212 | |||
213 | /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */ | ||
214 | memset(&comm_ev, 0, sizeof(comm_ev)); | ||
215 | field = strchr(bf, '('); | ||
216 | if (field == NULL) | ||
217 | goto out_failure; | ||
218 | sep = strchr(++field, ')'); | ||
219 | if (sep == NULL) | ||
220 | goto out_failure; | ||
221 | size = sep - field; | ||
222 | memcpy(comm_ev.comm, field, size++); | ||
223 | |||
224 | comm_ev.pid = pid; | ||
225 | comm_ev.header.type = PERF_EVENT_COMM; | ||
226 | size = ALIGN(size, sizeof(__u64)); | ||
227 | comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); | ||
228 | |||
229 | if (!full) { | ||
230 | comm_ev.tid = pid; | ||
231 | |||
232 | ret = write(output, &comm_ev, comm_ev.header.size); | ||
233 | if (ret < 0) { | ||
234 | perror("failed to write"); | ||
235 | exit(-1); | ||
236 | } | ||
237 | return; | ||
238 | } | ||
239 | |||
240 | snprintf(filename, sizeof(filename), "/proc/%d/task", pid); | ||
241 | |||
242 | tasks = opendir(filename); | ||
243 | while (!readdir_r(tasks, &dirent, &next) && next) { | ||
244 | char *end; | ||
245 | pid = strtol(dirent.d_name, &end, 10); | ||
246 | if (*end) | ||
247 | continue; | ||
248 | |||
249 | comm_ev.tid = pid; | ||
250 | |||
251 | ret = write(output, &comm_ev, comm_ev.header.size); | ||
252 | if (ret < 0) { | ||
253 | perror("failed to write"); | ||
254 | exit(-1); | ||
255 | } | ||
256 | } | ||
257 | closedir(tasks); | ||
258 | return; | ||
259 | |||
260 | out_failure: | ||
261 | fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n", | ||
262 | filename); | ||
263 | exit(EXIT_FAILURE); | ||
264 | } | ||
265 | |||
266 | static void pid_synthesize_mmap_samples(pid_t pid) | ||
267 | { | ||
268 | char filename[PATH_MAX]; | ||
269 | FILE *fp; | ||
270 | |||
271 | snprintf(filename, sizeof(filename), "/proc/%d/maps", pid); | ||
272 | |||
273 | fp = fopen(filename, "r"); | ||
274 | if (fp == NULL) { | ||
275 | fprintf(stderr, "couldn't open %s\n", filename); | ||
276 | exit(EXIT_FAILURE); | ||
277 | } | ||
278 | while (1) { | ||
279 | char bf[BUFSIZ], *pbf = bf; | ||
280 | struct mmap_event mmap_ev = { | ||
281 | .header.type = PERF_EVENT_MMAP, | ||
282 | }; | ||
283 | int n; | ||
284 | size_t size; | ||
285 | if (fgets(bf, sizeof(bf), fp) == NULL) | ||
286 | break; | ||
287 | |||
288 | /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ | ||
289 | n = hex2u64(pbf, &mmap_ev.start); | ||
290 | if (n < 0) | ||
291 | continue; | ||
292 | pbf += n + 1; | ||
293 | n = hex2u64(pbf, &mmap_ev.len); | ||
294 | if (n < 0) | ||
295 | continue; | ||
296 | pbf += n + 3; | ||
297 | if (*pbf == 'x') { /* vm_exec */ | ||
298 | char *execname = strrchr(bf, ' '); | ||
299 | |||
300 | if (execname == NULL || execname[1] != '/') | ||
301 | continue; | ||
302 | |||
303 | execname += 1; | ||
304 | size = strlen(execname); | ||
305 | execname[size - 1] = '\0'; /* Remove \n */ | ||
306 | memcpy(mmap_ev.filename, execname, size); | ||
307 | size = ALIGN(size, sizeof(__u64)); | ||
308 | mmap_ev.len -= mmap_ev.start; | ||
309 | mmap_ev.header.size = (sizeof(mmap_ev) - | ||
310 | (sizeof(mmap_ev.filename) - size)); | ||
311 | mmap_ev.pid = pid; | ||
312 | mmap_ev.tid = pid; | ||
313 | |||
314 | if (write(output, &mmap_ev, mmap_ev.header.size) < 0) { | ||
315 | perror("failed to write"); | ||
316 | exit(-1); | ||
317 | } | ||
318 | } | ||
319 | } | ||
320 | |||
321 | fclose(fp); | ||
322 | } | ||
323 | |||
324 | static void synthesize_samples(void) | ||
325 | { | ||
326 | DIR *proc; | ||
327 | struct dirent dirent, *next; | ||
328 | |||
329 | proc = opendir("/proc"); | ||
330 | |||
331 | while (!readdir_r(proc, &dirent, &next) && next) { | ||
332 | char *end; | ||
333 | pid_t pid; | ||
334 | |||
335 | pid = strtol(dirent.d_name, &end, 10); | ||
336 | if (*end) /* only interested in proper numerical dirents */ | ||
337 | continue; | ||
338 | |||
339 | pid_synthesize_comm_event(pid, 1); | ||
340 | pid_synthesize_mmap_samples(pid); | ||
341 | } | ||
342 | |||
343 | closedir(proc); | ||
344 | } | ||
345 | |||
346 | static int group_fd; | ||
347 | |||
348 | static void create_counter(int counter, int cpu, pid_t pid) | ||
349 | { | ||
350 | struct perf_counter_attr *attr = attrs + counter; | ||
351 | int track = 1; | ||
352 | |||
353 | attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; | ||
354 | if (freq) { | ||
355 | attr->sample_type |= PERF_SAMPLE_PERIOD; | ||
356 | attr->freq = 1; | ||
357 | attr->sample_freq = freq; | ||
358 | } | ||
359 | attr->mmap = track; | ||
360 | attr->comm = track; | ||
361 | attr->inherit = (cpu < 0) && inherit; | ||
362 | attr->disabled = 1; | ||
363 | |||
364 | track = 0; /* only the first counter needs these */ | ||
365 | |||
366 | try_again: | ||
367 | fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0); | ||
368 | |||
369 | if (fd[nr_cpu][counter] < 0) { | ||
370 | int err = errno; | ||
371 | |||
372 | if (err == EPERM) | ||
373 | die("Permission error - are you root?\n"); | ||
374 | |||
375 | /* | ||
376 | * If it's cycles then fall back to hrtimer | ||
377 | * based cpu-clock-tick sw counter, which | ||
378 | * is always available even if no PMU support: | ||
379 | */ | ||
380 | if (attr->type == PERF_TYPE_HARDWARE | ||
381 | && attr->config == PERF_COUNT_HW_CPU_CYCLES) { | ||
382 | |||
383 | if (verbose) | ||
384 | warning(" ... trying to fall back to cpu-clock-ticks\n"); | ||
385 | attr->type = PERF_TYPE_SOFTWARE; | ||
386 | attr->config = PERF_COUNT_SW_CPU_CLOCK; | ||
387 | goto try_again; | ||
388 | } | ||
389 | printf("\n"); | ||
390 | error("perfcounter syscall returned with %d (%s)\n", | ||
391 | fd[nr_cpu][counter], strerror(err)); | ||
392 | die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n"); | ||
393 | exit(-1); | ||
394 | } | ||
395 | |||
396 | assert(fd[nr_cpu][counter] >= 0); | ||
397 | fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); | ||
398 | |||
399 | /* | ||
400 | * First counter acts as the group leader: | ||
401 | */ | ||
402 | if (group && group_fd == -1) | ||
403 | group_fd = fd[nr_cpu][counter]; | ||
404 | |||
405 | event_array[nr_poll].fd = fd[nr_cpu][counter]; | ||
406 | event_array[nr_poll].events = POLLIN; | ||
407 | nr_poll++; | ||
408 | |||
409 | mmap_array[nr_cpu][counter].counter = counter; | ||
410 | mmap_array[nr_cpu][counter].prev = 0; | ||
411 | mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; | ||
412 | mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | ||
413 | PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0); | ||
414 | if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { | ||
415 | error("failed to mmap with %d (%s)\n", errno, strerror(errno)); | ||
416 | exit(-1); | ||
417 | } | ||
418 | |||
419 | ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE); | ||
420 | } | ||
421 | |||
422 | static void open_counters(int cpu, pid_t pid) | ||
423 | { | ||
424 | int counter; | ||
425 | |||
426 | if (pid > 0) { | ||
427 | pid_synthesize_comm_event(pid, 0); | ||
428 | pid_synthesize_mmap_samples(pid); | ||
429 | } | ||
430 | |||
431 | group_fd = -1; | ||
432 | for (counter = 0; counter < nr_counters; counter++) | ||
433 | create_counter(counter, cpu, pid); | ||
434 | |||
435 | nr_cpu++; | ||
436 | } | ||
437 | |||
438 | static int __cmd_record(int argc, const char **argv) | ||
439 | { | ||
440 | int i, counter; | ||
441 | struct stat st; | ||
442 | pid_t pid; | ||
443 | int flags; | ||
444 | int ret; | ||
445 | |||
446 | page_size = sysconf(_SC_PAGE_SIZE); | ||
447 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
448 | assert(nr_cpus <= MAX_NR_CPUS); | ||
449 | assert(nr_cpus >= 0); | ||
450 | |||
451 | if (!stat(output_name, &st) && !force && !append_file) { | ||
452 | fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", | ||
453 | output_name); | ||
454 | exit(-1); | ||
455 | } | ||
456 | |||
457 | flags = O_CREAT|O_RDWR; | ||
458 | if (append_file) | ||
459 | flags |= O_APPEND; | ||
460 | else | ||
461 | flags |= O_TRUNC; | ||
462 | |||
463 | output = open(output_name, flags, S_IRUSR|S_IWUSR); | ||
464 | if (output < 0) { | ||
465 | perror("failed to create output file"); | ||
466 | exit(-1); | ||
467 | } | ||
468 | |||
469 | if (!system_wide) { | ||
470 | open_counters(-1, target_pid != -1 ? target_pid : getpid()); | ||
471 | } else for (i = 0; i < nr_cpus; i++) | ||
472 | open_counters(i, target_pid); | ||
473 | |||
474 | atexit(sig_atexit); | ||
475 | signal(SIGCHLD, sig_handler); | ||
476 | signal(SIGINT, sig_handler); | ||
477 | |||
478 | if (target_pid == -1 && argc) { | ||
479 | pid = fork(); | ||
480 | if (pid < 0) | ||
481 | perror("failed to fork"); | ||
482 | |||
483 | if (!pid) { | ||
484 | if (execvp(argv[0], (char **)argv)) { | ||
485 | perror(argv[0]); | ||
486 | exit(-1); | ||
487 | } | ||
488 | } | ||
489 | } | ||
490 | |||
491 | if (realtime_prio) { | ||
492 | struct sched_param param; | ||
493 | |||
494 | param.sched_priority = realtime_prio; | ||
495 | if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { | ||
496 | printf("Could not set realtime priority.\n"); | ||
497 | exit(-1); | ||
498 | } | ||
499 | } | ||
500 | |||
501 | if (system_wide) | ||
502 | synthesize_samples(); | ||
503 | |||
504 | while (!done) { | ||
505 | int hits = samples; | ||
506 | |||
507 | for (i = 0; i < nr_cpu; i++) { | ||
508 | for (counter = 0; counter < nr_counters; counter++) | ||
509 | mmap_read(&mmap_array[i][counter]); | ||
510 | } | ||
511 | |||
512 | if (hits == samples) | ||
513 | ret = poll(event_array, nr_poll, 100); | ||
514 | } | ||
515 | |||
516 | /* | ||
517 | * Approximate RIP event size: 24 bytes. | ||
518 | */ | ||
519 | fprintf(stderr, | ||
520 | "[ perf record: Captured and wrote %.3f MB %s (~%lld samples) ]\n", | ||
521 | (double)bytes_written / 1024.0 / 1024.0, | ||
522 | output_name, | ||
523 | bytes_written / 24); | ||
524 | |||
525 | return 0; | ||
526 | } | ||
527 | |||
528 | static const char * const record_usage[] = { | ||
529 | "perf record [<options>] [<command>]", | ||
530 | "perf record [<options>] -- <command> [<options>]", | ||
531 | NULL | ||
532 | }; | ||
533 | |||
534 | static const struct option options[] = { | ||
535 | OPT_CALLBACK('e', "event", NULL, "event", | ||
536 | "event selector. use 'perf list' to list available events", | ||
537 | parse_events), | ||
538 | OPT_INTEGER('p', "pid", &target_pid, | ||
539 | "record events on existing pid"), | ||
540 | OPT_INTEGER('r', "realtime", &realtime_prio, | ||
541 | "collect data with this RT SCHED_FIFO priority"), | ||
542 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | ||
543 | "system-wide collection from all CPUs"), | ||
544 | OPT_BOOLEAN('A', "append", &append_file, | ||
545 | "append to the output file to do incremental profiling"), | ||
546 | OPT_BOOLEAN('f', "force", &force, | ||
547 | "overwrite existing data file"), | ||
548 | OPT_LONG('c', "count", &default_interval, | ||
549 | "event period to sample"), | ||
550 | OPT_STRING('o', "output", &output_name, "file", | ||
551 | "output file name"), | ||
552 | OPT_BOOLEAN('i', "inherit", &inherit, | ||
553 | "child tasks inherit counters"), | ||
554 | OPT_INTEGER('F', "freq", &freq, | ||
555 | "profile at this frequency"), | ||
556 | OPT_INTEGER('m', "mmap-pages", &mmap_pages, | ||
557 | "number of mmap data pages"), | ||
558 | OPT_BOOLEAN('v', "verbose", &verbose, | ||
559 | "be more verbose (show counter open errors, etc)"), | ||
560 | OPT_END() | ||
561 | }; | ||
562 | |||
563 | int cmd_record(int argc, const char **argv, const char *prefix) | ||
564 | { | ||
565 | int counter; | ||
566 | |||
567 | argc = parse_options(argc, argv, options, record_usage, 0); | ||
568 | if (!argc && target_pid == -1 && !system_wide) | ||
569 | usage_with_options(record_usage, options); | ||
570 | |||
571 | if (!nr_counters) | ||
572 | nr_counters = 1; | ||
573 | |||
574 | for (counter = 0; counter < nr_counters; counter++) { | ||
575 | if (attrs[counter].sample_period) | ||
576 | continue; | ||
577 | |||
578 | attrs[counter].sample_period = default_interval; | ||
579 | } | ||
580 | |||
581 | return __cmd_record(argc, argv); | ||
582 | } | ||
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c new file mode 100644 index 000000000000..82fa93b4db99 --- /dev/null +++ b/tools/perf/builtin-report.c | |||
@@ -0,0 +1,1316 @@ | |||
1 | /* | ||
2 | * builtin-report.c | ||
3 | * | ||
4 | * Builtin report command: Analyze the perf.data input file, | ||
5 | * look up and read DSOs and symbol information and display | ||
6 | * a histogram of results, along various sorting keys. | ||
7 | */ | ||
8 | #include "builtin.h" | ||
9 | |||
10 | #include "util/util.h" | ||
11 | |||
12 | #include "util/color.h" | ||
13 | #include "util/list.h" | ||
14 | #include "util/cache.h" | ||
15 | #include "util/rbtree.h" | ||
16 | #include "util/symbol.h" | ||
17 | #include "util/string.h" | ||
18 | |||
19 | #include "perf.h" | ||
20 | |||
21 | #include "util/parse-options.h" | ||
22 | #include "util/parse-events.h" | ||
23 | |||
24 | #define SHOW_KERNEL 1 | ||
25 | #define SHOW_USER 2 | ||
26 | #define SHOW_HV 4 | ||
27 | |||
28 | static char const *input_name = "perf.data"; | ||
29 | static char *vmlinux = NULL; | ||
30 | |||
31 | static char default_sort_order[] = "comm,dso"; | ||
32 | static char *sort_order = default_sort_order; | ||
33 | |||
34 | static int input; | ||
35 | static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; | ||
36 | |||
37 | static int dump_trace = 0; | ||
38 | #define dprintf(x...) do { if (dump_trace) printf(x); } while (0) | ||
39 | |||
40 | static int verbose; | ||
41 | static int full_paths; | ||
42 | |||
43 | static unsigned long page_size; | ||
44 | static unsigned long mmap_window = 32; | ||
45 | |||
46 | struct ip_event { | ||
47 | struct perf_event_header header; | ||
48 | __u64 ip; | ||
49 | __u32 pid, tid; | ||
50 | __u64 period; | ||
51 | }; | ||
52 | |||
53 | struct mmap_event { | ||
54 | struct perf_event_header header; | ||
55 | __u32 pid, tid; | ||
56 | __u64 start; | ||
57 | __u64 len; | ||
58 | __u64 pgoff; | ||
59 | char filename[PATH_MAX]; | ||
60 | }; | ||
61 | |||
62 | struct comm_event { | ||
63 | struct perf_event_header header; | ||
64 | __u32 pid, tid; | ||
65 | char comm[16]; | ||
66 | }; | ||
67 | |||
68 | struct fork_event { | ||
69 | struct perf_event_header header; | ||
70 | __u32 pid, ppid; | ||
71 | }; | ||
72 | |||
73 | struct period_event { | ||
74 | struct perf_event_header header; | ||
75 | __u64 time; | ||
76 | __u64 id; | ||
77 | __u64 sample_period; | ||
78 | }; | ||
79 | |||
80 | typedef union event_union { | ||
81 | struct perf_event_header header; | ||
82 | struct ip_event ip; | ||
83 | struct mmap_event mmap; | ||
84 | struct comm_event comm; | ||
85 | struct fork_event fork; | ||
86 | struct period_event period; | ||
87 | } event_t; | ||
88 | |||
89 | static LIST_HEAD(dsos); | ||
90 | static struct dso *kernel_dso; | ||
91 | static struct dso *vdso; | ||
92 | |||
93 | static void dsos__add(struct dso *dso) | ||
94 | { | ||
95 | list_add_tail(&dso->node, &dsos); | ||
96 | } | ||
97 | |||
98 | static struct dso *dsos__find(const char *name) | ||
99 | { | ||
100 | struct dso *pos; | ||
101 | |||
102 | list_for_each_entry(pos, &dsos, node) | ||
103 | if (strcmp(pos->name, name) == 0) | ||
104 | return pos; | ||
105 | return NULL; | ||
106 | } | ||
107 | |||
108 | static struct dso *dsos__findnew(const char *name) | ||
109 | { | ||
110 | struct dso *dso = dsos__find(name); | ||
111 | int nr; | ||
112 | |||
113 | if (dso) | ||
114 | return dso; | ||
115 | |||
116 | dso = dso__new(name, 0); | ||
117 | if (!dso) | ||
118 | goto out_delete_dso; | ||
119 | |||
120 | nr = dso__load(dso, NULL, verbose); | ||
121 | if (nr < 0) { | ||
122 | if (verbose) | ||
123 | fprintf(stderr, "Failed to open: %s\n", name); | ||
124 | goto out_delete_dso; | ||
125 | } | ||
126 | if (!nr && verbose) { | ||
127 | fprintf(stderr, | ||
128 | "No symbols found in: %s, maybe install a debug package?\n", | ||
129 | name); | ||
130 | } | ||
131 | |||
132 | dsos__add(dso); | ||
133 | |||
134 | return dso; | ||
135 | |||
136 | out_delete_dso: | ||
137 | dso__delete(dso); | ||
138 | return NULL; | ||
139 | } | ||
140 | |||
141 | static void dsos__fprintf(FILE *fp) | ||
142 | { | ||
143 | struct dso *pos; | ||
144 | |||
145 | list_for_each_entry(pos, &dsos, node) | ||
146 | dso__fprintf(pos, fp); | ||
147 | } | ||
148 | |||
149 | static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip) | ||
150 | { | ||
151 | return dso__find_symbol(kernel_dso, ip); | ||
152 | } | ||
153 | |||
154 | static int load_kernel(void) | ||
155 | { | ||
156 | int err; | ||
157 | |||
158 | kernel_dso = dso__new("[kernel]", 0); | ||
159 | if (!kernel_dso) | ||
160 | return -1; | ||
161 | |||
162 | err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose); | ||
163 | if (err) { | ||
164 | dso__delete(kernel_dso); | ||
165 | kernel_dso = NULL; | ||
166 | } else | ||
167 | dsos__add(kernel_dso); | ||
168 | |||
169 | vdso = dso__new("[vdso]", 0); | ||
170 | if (!vdso) | ||
171 | return -1; | ||
172 | |||
173 | vdso->find_symbol = vdso__find_symbol; | ||
174 | |||
175 | dsos__add(vdso); | ||
176 | |||
177 | return err; | ||
178 | } | ||
179 | |||
180 | static char __cwd[PATH_MAX]; | ||
181 | static char *cwd = __cwd; | ||
182 | static int cwdlen; | ||
183 | |||
184 | static int strcommon(const char *pathname) | ||
185 | { | ||
186 | int n = 0; | ||
187 | |||
188 | while (pathname[n] == cwd[n] && n < cwdlen) | ||
189 | ++n; | ||
190 | |||
191 | return n; | ||
192 | } | ||
193 | |||
194 | struct map { | ||
195 | struct list_head node; | ||
196 | __u64 start; | ||
197 | __u64 end; | ||
198 | __u64 pgoff; | ||
199 | __u64 (*map_ip)(struct map *, __u64); | ||
200 | struct dso *dso; | ||
201 | }; | ||
202 | |||
203 | static __u64 map__map_ip(struct map *map, __u64 ip) | ||
204 | { | ||
205 | return ip - map->start + map->pgoff; | ||
206 | } | ||
207 | |||
208 | static __u64 vdso__map_ip(struct map *map, __u64 ip) | ||
209 | { | ||
210 | return ip; | ||
211 | } | ||
212 | |||
213 | static inline int is_anon_memory(const char *filename) | ||
214 | { | ||
215 | return strcmp(filename, "//anon") == 0; | ||
216 | } | ||
217 | |||
218 | static struct map *map__new(struct mmap_event *event) | ||
219 | { | ||
220 | struct map *self = malloc(sizeof(*self)); | ||
221 | |||
222 | if (self != NULL) { | ||
223 | const char *filename = event->filename; | ||
224 | char newfilename[PATH_MAX]; | ||
225 | int anon; | ||
226 | |||
227 | if (cwd) { | ||
228 | int n = strcommon(filename); | ||
229 | |||
230 | if (n == cwdlen) { | ||
231 | snprintf(newfilename, sizeof(newfilename), | ||
232 | ".%s", filename + n); | ||
233 | filename = newfilename; | ||
234 | } | ||
235 | } | ||
236 | |||
237 | anon = is_anon_memory(filename); | ||
238 | |||
239 | if (anon) { | ||
240 | snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", event->pid); | ||
241 | filename = newfilename; | ||
242 | } | ||
243 | |||
244 | self->start = event->start; | ||
245 | self->end = event->start + event->len; | ||
246 | self->pgoff = event->pgoff; | ||
247 | |||
248 | self->dso = dsos__findnew(filename); | ||
249 | if (self->dso == NULL) | ||
250 | goto out_delete; | ||
251 | |||
252 | if (self->dso == vdso || anon) | ||
253 | self->map_ip = vdso__map_ip; | ||
254 | else | ||
255 | self->map_ip = map__map_ip; | ||
256 | } | ||
257 | return self; | ||
258 | out_delete: | ||
259 | free(self); | ||
260 | return NULL; | ||
261 | } | ||
262 | |||
263 | static struct map *map__clone(struct map *self) | ||
264 | { | ||
265 | struct map *map = malloc(sizeof(*self)); | ||
266 | |||
267 | if (!map) | ||
268 | return NULL; | ||
269 | |||
270 | memcpy(map, self, sizeof(*self)); | ||
271 | |||
272 | return map; | ||
273 | } | ||
274 | |||
275 | static int map__overlap(struct map *l, struct map *r) | ||
276 | { | ||
277 | if (l->start > r->start) { | ||
278 | struct map *t = l; | ||
279 | l = r; | ||
280 | r = t; | ||
281 | } | ||
282 | |||
283 | if (l->end > r->start) | ||
284 | return 1; | ||
285 | |||
286 | return 0; | ||
287 | } | ||
288 | |||
289 | static size_t map__fprintf(struct map *self, FILE *fp) | ||
290 | { | ||
291 | return fprintf(fp, " %Lx-%Lx %Lx %s\n", | ||
292 | self->start, self->end, self->pgoff, self->dso->name); | ||
293 | } | ||
294 | |||
295 | |||
296 | struct thread { | ||
297 | struct rb_node rb_node; | ||
298 | struct list_head maps; | ||
299 | pid_t pid; | ||
300 | char *comm; | ||
301 | }; | ||
302 | |||
303 | static struct thread *thread__new(pid_t pid) | ||
304 | { | ||
305 | struct thread *self = malloc(sizeof(*self)); | ||
306 | |||
307 | if (self != NULL) { | ||
308 | self->pid = pid; | ||
309 | self->comm = malloc(32); | ||
310 | if (self->comm) | ||
311 | snprintf(self->comm, 32, ":%d", self->pid); | ||
312 | INIT_LIST_HEAD(&self->maps); | ||
313 | } | ||
314 | |||
315 | return self; | ||
316 | } | ||
317 | |||
318 | static int thread__set_comm(struct thread *self, const char *comm) | ||
319 | { | ||
320 | if (self->comm) | ||
321 | free(self->comm); | ||
322 | self->comm = strdup(comm); | ||
323 | return self->comm ? 0 : -ENOMEM; | ||
324 | } | ||
325 | |||
326 | static size_t thread__fprintf(struct thread *self, FILE *fp) | ||
327 | { | ||
328 | struct map *pos; | ||
329 | size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); | ||
330 | |||
331 | list_for_each_entry(pos, &self->maps, node) | ||
332 | ret += map__fprintf(pos, fp); | ||
333 | |||
334 | return ret; | ||
335 | } | ||
336 | |||
337 | |||
338 | static struct rb_root threads; | ||
339 | static struct thread *last_match; | ||
340 | |||
341 | static struct thread *threads__findnew(pid_t pid) | ||
342 | { | ||
343 | struct rb_node **p = &threads.rb_node; | ||
344 | struct rb_node *parent = NULL; | ||
345 | struct thread *th; | ||
346 | |||
347 | /* | ||
348 | * Font-end cache - PID lookups come in blocks, | ||
349 | * so most of the time we dont have to look up | ||
350 | * the full rbtree: | ||
351 | */ | ||
352 | if (last_match && last_match->pid == pid) | ||
353 | return last_match; | ||
354 | |||
355 | while (*p != NULL) { | ||
356 | parent = *p; | ||
357 | th = rb_entry(parent, struct thread, rb_node); | ||
358 | |||
359 | if (th->pid == pid) { | ||
360 | last_match = th; | ||
361 | return th; | ||
362 | } | ||
363 | |||
364 | if (pid < th->pid) | ||
365 | p = &(*p)->rb_left; | ||
366 | else | ||
367 | p = &(*p)->rb_right; | ||
368 | } | ||
369 | |||
370 | th = thread__new(pid); | ||
371 | if (th != NULL) { | ||
372 | rb_link_node(&th->rb_node, parent, p); | ||
373 | rb_insert_color(&th->rb_node, &threads); | ||
374 | last_match = th; | ||
375 | } | ||
376 | |||
377 | return th; | ||
378 | } | ||
379 | |||
380 | static void thread__insert_map(struct thread *self, struct map *map) | ||
381 | { | ||
382 | struct map *pos, *tmp; | ||
383 | |||
384 | list_for_each_entry_safe(pos, tmp, &self->maps, node) { | ||
385 | if (map__overlap(pos, map)) { | ||
386 | list_del_init(&pos->node); | ||
387 | /* XXX leaks dsos */ | ||
388 | free(pos); | ||
389 | } | ||
390 | } | ||
391 | |||
392 | list_add_tail(&map->node, &self->maps); | ||
393 | } | ||
394 | |||
395 | static int thread__fork(struct thread *self, struct thread *parent) | ||
396 | { | ||
397 | struct map *map; | ||
398 | |||
399 | if (self->comm) | ||
400 | free(self->comm); | ||
401 | self->comm = strdup(parent->comm); | ||
402 | if (!self->comm) | ||
403 | return -ENOMEM; | ||
404 | |||
405 | list_for_each_entry(map, &parent->maps, node) { | ||
406 | struct map *new = map__clone(map); | ||
407 | if (!new) | ||
408 | return -ENOMEM; | ||
409 | thread__insert_map(self, new); | ||
410 | } | ||
411 | |||
412 | return 0; | ||
413 | } | ||
414 | |||
415 | static struct map *thread__find_map(struct thread *self, __u64 ip) | ||
416 | { | ||
417 | struct map *pos; | ||
418 | |||
419 | if (self == NULL) | ||
420 | return NULL; | ||
421 | |||
422 | list_for_each_entry(pos, &self->maps, node) | ||
423 | if (ip >= pos->start && ip <= pos->end) | ||
424 | return pos; | ||
425 | |||
426 | return NULL; | ||
427 | } | ||
428 | |||
429 | static size_t threads__fprintf(FILE *fp) | ||
430 | { | ||
431 | size_t ret = 0; | ||
432 | struct rb_node *nd; | ||
433 | |||
434 | for (nd = rb_first(&threads); nd; nd = rb_next(nd)) { | ||
435 | struct thread *pos = rb_entry(nd, struct thread, rb_node); | ||
436 | |||
437 | ret += thread__fprintf(pos, fp); | ||
438 | } | ||
439 | |||
440 | return ret; | ||
441 | } | ||
442 | |||
443 | /* | ||
444 | * histogram, sorted on item, collects counts | ||
445 | */ | ||
446 | |||
447 | static struct rb_root hist; | ||
448 | |||
449 | struct hist_entry { | ||
450 | struct rb_node rb_node; | ||
451 | |||
452 | struct thread *thread; | ||
453 | struct map *map; | ||
454 | struct dso *dso; | ||
455 | struct symbol *sym; | ||
456 | __u64 ip; | ||
457 | char level; | ||
458 | |||
459 | __u64 count; | ||
460 | }; | ||
461 | |||
462 | /* | ||
463 | * configurable sorting bits | ||
464 | */ | ||
465 | |||
466 | struct sort_entry { | ||
467 | struct list_head list; | ||
468 | |||
469 | char *header; | ||
470 | |||
471 | int64_t (*cmp)(struct hist_entry *, struct hist_entry *); | ||
472 | int64_t (*collapse)(struct hist_entry *, struct hist_entry *); | ||
473 | size_t (*print)(FILE *fp, struct hist_entry *); | ||
474 | }; | ||
475 | |||
476 | /* --sort pid */ | ||
477 | |||
478 | static int64_t | ||
479 | sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) | ||
480 | { | ||
481 | return right->thread->pid - left->thread->pid; | ||
482 | } | ||
483 | |||
484 | static size_t | ||
485 | sort__thread_print(FILE *fp, struct hist_entry *self) | ||
486 | { | ||
487 | return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid); | ||
488 | } | ||
489 | |||
490 | static struct sort_entry sort_thread = { | ||
491 | .header = " Command: Pid", | ||
492 | .cmp = sort__thread_cmp, | ||
493 | .print = sort__thread_print, | ||
494 | }; | ||
495 | |||
496 | /* --sort comm */ | ||
497 | |||
498 | static int64_t | ||
499 | sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) | ||
500 | { | ||
501 | return right->thread->pid - left->thread->pid; | ||
502 | } | ||
503 | |||
504 | static int64_t | ||
505 | sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) | ||
506 | { | ||
507 | char *comm_l = left->thread->comm; | ||
508 | char *comm_r = right->thread->comm; | ||
509 | |||
510 | if (!comm_l || !comm_r) { | ||
511 | if (!comm_l && !comm_r) | ||
512 | return 0; | ||
513 | else if (!comm_l) | ||
514 | return -1; | ||
515 | else | ||
516 | return 1; | ||
517 | } | ||
518 | |||
519 | return strcmp(comm_l, comm_r); | ||
520 | } | ||
521 | |||
522 | static size_t | ||
523 | sort__comm_print(FILE *fp, struct hist_entry *self) | ||
524 | { | ||
525 | return fprintf(fp, "%16s", self->thread->comm); | ||
526 | } | ||
527 | |||
528 | static struct sort_entry sort_comm = { | ||
529 | .header = " Command", | ||
530 | .cmp = sort__comm_cmp, | ||
531 | .collapse = sort__comm_collapse, | ||
532 | .print = sort__comm_print, | ||
533 | }; | ||
534 | |||
535 | /* --sort dso */ | ||
536 | |||
537 | static int64_t | ||
538 | sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) | ||
539 | { | ||
540 | struct dso *dso_l = left->dso; | ||
541 | struct dso *dso_r = right->dso; | ||
542 | |||
543 | if (!dso_l || !dso_r) { | ||
544 | if (!dso_l && !dso_r) | ||
545 | return 0; | ||
546 | else if (!dso_l) | ||
547 | return -1; | ||
548 | else | ||
549 | return 1; | ||
550 | } | ||
551 | |||
552 | return strcmp(dso_l->name, dso_r->name); | ||
553 | } | ||
554 | |||
555 | static size_t | ||
556 | sort__dso_print(FILE *fp, struct hist_entry *self) | ||
557 | { | ||
558 | if (self->dso) | ||
559 | return fprintf(fp, "%-25s", self->dso->name); | ||
560 | |||
561 | return fprintf(fp, "%016llx ", (__u64)self->ip); | ||
562 | } | ||
563 | |||
564 | static struct sort_entry sort_dso = { | ||
565 | .header = "Shared Object ", | ||
566 | .cmp = sort__dso_cmp, | ||
567 | .print = sort__dso_print, | ||
568 | }; | ||
569 | |||
570 | /* --sort symbol */ | ||
571 | |||
572 | static int64_t | ||
573 | sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) | ||
574 | { | ||
575 | __u64 ip_l, ip_r; | ||
576 | |||
577 | if (left->sym == right->sym) | ||
578 | return 0; | ||
579 | |||
580 | ip_l = left->sym ? left->sym->start : left->ip; | ||
581 | ip_r = right->sym ? right->sym->start : right->ip; | ||
582 | |||
583 | return (int64_t)(ip_r - ip_l); | ||
584 | } | ||
585 | |||
586 | static size_t | ||
587 | sort__sym_print(FILE *fp, struct hist_entry *self) | ||
588 | { | ||
589 | size_t ret = 0; | ||
590 | |||
591 | if (verbose) | ||
592 | ret += fprintf(fp, "%#018llx ", (__u64)self->ip); | ||
593 | |||
594 | if (self->sym) { | ||
595 | ret += fprintf(fp, "[%c] %s", | ||
596 | self->dso == kernel_dso ? 'k' : '.', self->sym->name); | ||
597 | } else { | ||
598 | ret += fprintf(fp, "%#016llx", (__u64)self->ip); | ||
599 | } | ||
600 | |||
601 | return ret; | ||
602 | } | ||
603 | |||
604 | static struct sort_entry sort_sym = { | ||
605 | .header = "Symbol", | ||
606 | .cmp = sort__sym_cmp, | ||
607 | .print = sort__sym_print, | ||
608 | }; | ||
609 | |||
610 | static int sort__need_collapse = 0; | ||
611 | |||
612 | struct sort_dimension { | ||
613 | char *name; | ||
614 | struct sort_entry *entry; | ||
615 | int taken; | ||
616 | }; | ||
617 | |||
618 | static struct sort_dimension sort_dimensions[] = { | ||
619 | { .name = "pid", .entry = &sort_thread, }, | ||
620 | { .name = "comm", .entry = &sort_comm, }, | ||
621 | { .name = "dso", .entry = &sort_dso, }, | ||
622 | { .name = "symbol", .entry = &sort_sym, }, | ||
623 | }; | ||
624 | |||
625 | static LIST_HEAD(hist_entry__sort_list); | ||
626 | |||
627 | static int sort_dimension__add(char *tok) | ||
628 | { | ||
629 | int i; | ||
630 | |||
631 | for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { | ||
632 | struct sort_dimension *sd = &sort_dimensions[i]; | ||
633 | |||
634 | if (sd->taken) | ||
635 | continue; | ||
636 | |||
637 | if (strncasecmp(tok, sd->name, strlen(tok))) | ||
638 | continue; | ||
639 | |||
640 | if (sd->entry->collapse) | ||
641 | sort__need_collapse = 1; | ||
642 | |||
643 | list_add_tail(&sd->entry->list, &hist_entry__sort_list); | ||
644 | sd->taken = 1; | ||
645 | |||
646 | return 0; | ||
647 | } | ||
648 | |||
649 | return -ESRCH; | ||
650 | } | ||
651 | |||
652 | static int64_t | ||
653 | hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) | ||
654 | { | ||
655 | struct sort_entry *se; | ||
656 | int64_t cmp = 0; | ||
657 | |||
658 | list_for_each_entry(se, &hist_entry__sort_list, list) { | ||
659 | cmp = se->cmp(left, right); | ||
660 | if (cmp) | ||
661 | break; | ||
662 | } | ||
663 | |||
664 | return cmp; | ||
665 | } | ||
666 | |||
667 | static int64_t | ||
668 | hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) | ||
669 | { | ||
670 | struct sort_entry *se; | ||
671 | int64_t cmp = 0; | ||
672 | |||
673 | list_for_each_entry(se, &hist_entry__sort_list, list) { | ||
674 | int64_t (*f)(struct hist_entry *, struct hist_entry *); | ||
675 | |||
676 | f = se->collapse ?: se->cmp; | ||
677 | |||
678 | cmp = f(left, right); | ||
679 | if (cmp) | ||
680 | break; | ||
681 | } | ||
682 | |||
683 | return cmp; | ||
684 | } | ||
685 | |||
686 | static size_t | ||
687 | hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) | ||
688 | { | ||
689 | struct sort_entry *se; | ||
690 | size_t ret; | ||
691 | |||
692 | if (total_samples) { | ||
693 | double percent = self->count * 100.0 / total_samples; | ||
694 | char *color = PERF_COLOR_NORMAL; | ||
695 | |||
696 | /* | ||
697 | * We color high-overhead entries in red, mid-overhead | ||
698 | * entries in green - and keep the low overhead places | ||
699 | * normal: | ||
700 | */ | ||
701 | if (percent >= 5.0) { | ||
702 | color = PERF_COLOR_RED; | ||
703 | } else { | ||
704 | if (percent >= 0.5) | ||
705 | color = PERF_COLOR_GREEN; | ||
706 | } | ||
707 | |||
708 | ret = color_fprintf(fp, color, " %6.2f%%", | ||
709 | (self->count * 100.0) / total_samples); | ||
710 | } else | ||
711 | ret = fprintf(fp, "%12Ld ", self->count); | ||
712 | |||
713 | list_for_each_entry(se, &hist_entry__sort_list, list) { | ||
714 | fprintf(fp, " "); | ||
715 | ret += se->print(fp, self); | ||
716 | } | ||
717 | |||
718 | ret += fprintf(fp, "\n"); | ||
719 | |||
720 | return ret; | ||
721 | } | ||
722 | |||
723 | /* | ||
724 | * collect histogram counts | ||
725 | */ | ||
726 | |||
727 | static int | ||
728 | hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, | ||
729 | struct symbol *sym, __u64 ip, char level, __u64 count) | ||
730 | { | ||
731 | struct rb_node **p = &hist.rb_node; | ||
732 | struct rb_node *parent = NULL; | ||
733 | struct hist_entry *he; | ||
734 | struct hist_entry entry = { | ||
735 | .thread = thread, | ||
736 | .map = map, | ||
737 | .dso = dso, | ||
738 | .sym = sym, | ||
739 | .ip = ip, | ||
740 | .level = level, | ||
741 | .count = count, | ||
742 | }; | ||
743 | int cmp; | ||
744 | |||
745 | while (*p != NULL) { | ||
746 | parent = *p; | ||
747 | he = rb_entry(parent, struct hist_entry, rb_node); | ||
748 | |||
749 | cmp = hist_entry__cmp(&entry, he); | ||
750 | |||
751 | if (!cmp) { | ||
752 | he->count += count; | ||
753 | return 0; | ||
754 | } | ||
755 | |||
756 | if (cmp < 0) | ||
757 | p = &(*p)->rb_left; | ||
758 | else | ||
759 | p = &(*p)->rb_right; | ||
760 | } | ||
761 | |||
762 | he = malloc(sizeof(*he)); | ||
763 | if (!he) | ||
764 | return -ENOMEM; | ||
765 | *he = entry; | ||
766 | rb_link_node(&he->rb_node, parent, p); | ||
767 | rb_insert_color(&he->rb_node, &hist); | ||
768 | |||
769 | return 0; | ||
770 | } | ||
771 | |||
772 | static void hist_entry__free(struct hist_entry *he) | ||
773 | { | ||
774 | free(he); | ||
775 | } | ||
776 | |||
777 | /* | ||
778 | * collapse the histogram | ||
779 | */ | ||
780 | |||
781 | static struct rb_root collapse_hists; | ||
782 | |||
783 | static void collapse__insert_entry(struct hist_entry *he) | ||
784 | { | ||
785 | struct rb_node **p = &collapse_hists.rb_node; | ||
786 | struct rb_node *parent = NULL; | ||
787 | struct hist_entry *iter; | ||
788 | int64_t cmp; | ||
789 | |||
790 | while (*p != NULL) { | ||
791 | parent = *p; | ||
792 | iter = rb_entry(parent, struct hist_entry, rb_node); | ||
793 | |||
794 | cmp = hist_entry__collapse(iter, he); | ||
795 | |||
796 | if (!cmp) { | ||
797 | iter->count += he->count; | ||
798 | hist_entry__free(he); | ||
799 | return; | ||
800 | } | ||
801 | |||
802 | if (cmp < 0) | ||
803 | p = &(*p)->rb_left; | ||
804 | else | ||
805 | p = &(*p)->rb_right; | ||
806 | } | ||
807 | |||
808 | rb_link_node(&he->rb_node, parent, p); | ||
809 | rb_insert_color(&he->rb_node, &collapse_hists); | ||
810 | } | ||
811 | |||
812 | static void collapse__resort(void) | ||
813 | { | ||
814 | struct rb_node *next; | ||
815 | struct hist_entry *n; | ||
816 | |||
817 | if (!sort__need_collapse) | ||
818 | return; | ||
819 | |||
820 | next = rb_first(&hist); | ||
821 | while (next) { | ||
822 | n = rb_entry(next, struct hist_entry, rb_node); | ||
823 | next = rb_next(&n->rb_node); | ||
824 | |||
825 | rb_erase(&n->rb_node, &hist); | ||
826 | collapse__insert_entry(n); | ||
827 | } | ||
828 | } | ||
829 | |||
830 | /* | ||
831 | * reverse the map, sort on count. | ||
832 | */ | ||
833 | |||
834 | static struct rb_root output_hists; | ||
835 | |||
836 | static void output__insert_entry(struct hist_entry *he) | ||
837 | { | ||
838 | struct rb_node **p = &output_hists.rb_node; | ||
839 | struct rb_node *parent = NULL; | ||
840 | struct hist_entry *iter; | ||
841 | |||
842 | while (*p != NULL) { | ||
843 | parent = *p; | ||
844 | iter = rb_entry(parent, struct hist_entry, rb_node); | ||
845 | |||
846 | if (he->count > iter->count) | ||
847 | p = &(*p)->rb_left; | ||
848 | else | ||
849 | p = &(*p)->rb_right; | ||
850 | } | ||
851 | |||
852 | rb_link_node(&he->rb_node, parent, p); | ||
853 | rb_insert_color(&he->rb_node, &output_hists); | ||
854 | } | ||
855 | |||
856 | static void output__resort(void) | ||
857 | { | ||
858 | struct rb_node *next; | ||
859 | struct hist_entry *n; | ||
860 | struct rb_root *tree = &hist; | ||
861 | |||
862 | if (sort__need_collapse) | ||
863 | tree = &collapse_hists; | ||
864 | |||
865 | next = rb_first(tree); | ||
866 | |||
867 | while (next) { | ||
868 | n = rb_entry(next, struct hist_entry, rb_node); | ||
869 | next = rb_next(&n->rb_node); | ||
870 | |||
871 | rb_erase(&n->rb_node, tree); | ||
872 | output__insert_entry(n); | ||
873 | } | ||
874 | } | ||
875 | |||
876 | static size_t output__fprintf(FILE *fp, __u64 total_samples) | ||
877 | { | ||
878 | struct hist_entry *pos; | ||
879 | struct sort_entry *se; | ||
880 | struct rb_node *nd; | ||
881 | size_t ret = 0; | ||
882 | |||
883 | fprintf(fp, "\n"); | ||
884 | fprintf(fp, "#\n"); | ||
885 | fprintf(fp, "# (%Ld samples)\n", (__u64)total_samples); | ||
886 | fprintf(fp, "#\n"); | ||
887 | |||
888 | fprintf(fp, "# Overhead"); | ||
889 | list_for_each_entry(se, &hist_entry__sort_list, list) | ||
890 | fprintf(fp, " %s", se->header); | ||
891 | fprintf(fp, "\n"); | ||
892 | |||
893 | fprintf(fp, "# ........"); | ||
894 | list_for_each_entry(se, &hist_entry__sort_list, list) { | ||
895 | int i; | ||
896 | |||
897 | fprintf(fp, " "); | ||
898 | for (i = 0; i < strlen(se->header); i++) | ||
899 | fprintf(fp, "."); | ||
900 | } | ||
901 | fprintf(fp, "\n"); | ||
902 | |||
903 | fprintf(fp, "#\n"); | ||
904 | |||
905 | for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) { | ||
906 | pos = rb_entry(nd, struct hist_entry, rb_node); | ||
907 | ret += hist_entry__fprintf(fp, pos, total_samples); | ||
908 | } | ||
909 | |||
910 | if (!strcmp(sort_order, default_sort_order)) { | ||
911 | fprintf(fp, "#\n"); | ||
912 | fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n"); | ||
913 | fprintf(fp, "#\n"); | ||
914 | } | ||
915 | fprintf(fp, "\n"); | ||
916 | |||
917 | return ret; | ||
918 | } | ||
919 | |||
920 | static void register_idle_thread(void) | ||
921 | { | ||
922 | struct thread *thread = threads__findnew(0); | ||
923 | |||
924 | if (thread == NULL || | ||
925 | thread__set_comm(thread, "[idle]")) { | ||
926 | fprintf(stderr, "problem inserting idle task.\n"); | ||
927 | exit(-1); | ||
928 | } | ||
929 | } | ||
930 | |||
931 | static unsigned long total = 0, | ||
932 | total_mmap = 0, | ||
933 | total_comm = 0, | ||
934 | total_fork = 0, | ||
935 | total_unknown = 0; | ||
936 | |||
937 | static int | ||
938 | process_overflow_event(event_t *event, unsigned long offset, unsigned long head) | ||
939 | { | ||
940 | char level; | ||
941 | int show = 0; | ||
942 | struct dso *dso = NULL; | ||
943 | struct thread *thread = threads__findnew(event->ip.pid); | ||
944 | __u64 ip = event->ip.ip; | ||
945 | __u64 period = 1; | ||
946 | struct map *map = NULL; | ||
947 | |||
948 | if (event->header.type & PERF_SAMPLE_PERIOD) | ||
949 | period = event->ip.period; | ||
950 | |||
951 | dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", | ||
952 | (void *)(offset + head), | ||
953 | (void *)(long)(event->header.size), | ||
954 | event->header.misc, | ||
955 | event->ip.pid, | ||
956 | (void *)(long)ip, | ||
957 | (long long)period); | ||
958 | |||
959 | dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); | ||
960 | |||
961 | if (thread == NULL) { | ||
962 | fprintf(stderr, "problem processing %d event, skipping it.\n", | ||
963 | event->header.type); | ||
964 | return -1; | ||
965 | } | ||
966 | |||
967 | if (event->header.misc & PERF_EVENT_MISC_KERNEL) { | ||
968 | show = SHOW_KERNEL; | ||
969 | level = 'k'; | ||
970 | |||
971 | dso = kernel_dso; | ||
972 | |||
973 | dprintf(" ...... dso: %s\n", dso->name); | ||
974 | |||
975 | } else if (event->header.misc & PERF_EVENT_MISC_USER) { | ||
976 | |||
977 | show = SHOW_USER; | ||
978 | level = '.'; | ||
979 | |||
980 | map = thread__find_map(thread, ip); | ||
981 | if (map != NULL) { | ||
982 | ip = map->map_ip(map, ip); | ||
983 | dso = map->dso; | ||
984 | } else { | ||
985 | /* | ||
986 | * If this is outside of all known maps, | ||
987 | * and is a negative address, try to look it | ||
988 | * up in the kernel dso, as it might be a | ||
989 | * vsyscall (which executes in user-mode): | ||
990 | */ | ||
991 | if ((long long)ip < 0) | ||
992 | dso = kernel_dso; | ||
993 | } | ||
994 | dprintf(" ...... dso: %s\n", dso ? dso->name : "<not found>"); | ||
995 | |||
996 | } else { | ||
997 | show = SHOW_HV; | ||
998 | level = 'H'; | ||
999 | dprintf(" ...... dso: [hypervisor]\n"); | ||
1000 | } | ||
1001 | |||
1002 | if (show & show_mask) { | ||
1003 | struct symbol *sym = NULL; | ||
1004 | |||
1005 | if (dso) | ||
1006 | sym = dso->find_symbol(dso, ip); | ||
1007 | |||
1008 | if (hist_entry__add(thread, map, dso, sym, ip, level, period)) { | ||
1009 | fprintf(stderr, | ||
1010 | "problem incrementing symbol count, skipping event\n"); | ||
1011 | return -1; | ||
1012 | } | ||
1013 | } | ||
1014 | total += period; | ||
1015 | |||
1016 | return 0; | ||
1017 | } | ||
1018 | |||
1019 | static int | ||
1020 | process_mmap_event(event_t *event, unsigned long offset, unsigned long head) | ||
1021 | { | ||
1022 | struct thread *thread = threads__findnew(event->mmap.pid); | ||
1023 | struct map *map = map__new(&event->mmap); | ||
1024 | |||
1025 | dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", | ||
1026 | (void *)(offset + head), | ||
1027 | (void *)(long)(event->header.size), | ||
1028 | event->mmap.pid, | ||
1029 | (void *)(long)event->mmap.start, | ||
1030 | (void *)(long)event->mmap.len, | ||
1031 | (void *)(long)event->mmap.pgoff, | ||
1032 | event->mmap.filename); | ||
1033 | |||
1034 | if (thread == NULL || map == NULL) { | ||
1035 | dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n"); | ||
1036 | return 0; | ||
1037 | } | ||
1038 | |||
1039 | thread__insert_map(thread, map); | ||
1040 | total_mmap++; | ||
1041 | |||
1042 | return 0; | ||
1043 | } | ||
1044 | |||
1045 | static int | ||
1046 | process_comm_event(event_t *event, unsigned long offset, unsigned long head) | ||
1047 | { | ||
1048 | struct thread *thread = threads__findnew(event->comm.pid); | ||
1049 | |||
1050 | dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", | ||
1051 | (void *)(offset + head), | ||
1052 | (void *)(long)(event->header.size), | ||
1053 | event->comm.comm, event->comm.pid); | ||
1054 | |||
1055 | if (thread == NULL || | ||
1056 | thread__set_comm(thread, event->comm.comm)) { | ||
1057 | dprintf("problem processing PERF_EVENT_COMM, skipping event.\n"); | ||
1058 | return -1; | ||
1059 | } | ||
1060 | total_comm++; | ||
1061 | |||
1062 | return 0; | ||
1063 | } | ||
1064 | |||
1065 | static int | ||
1066 | process_fork_event(event_t *event, unsigned long offset, unsigned long head) | ||
1067 | { | ||
1068 | struct thread *thread = threads__findnew(event->fork.pid); | ||
1069 | struct thread *parent = threads__findnew(event->fork.ppid); | ||
1070 | |||
1071 | dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n", | ||
1072 | (void *)(offset + head), | ||
1073 | (void *)(long)(event->header.size), | ||
1074 | event->fork.pid, event->fork.ppid); | ||
1075 | |||
1076 | if (!thread || !parent || thread__fork(thread, parent)) { | ||
1077 | dprintf("problem processing PERF_EVENT_FORK, skipping event.\n"); | ||
1078 | return -1; | ||
1079 | } | ||
1080 | total_fork++; | ||
1081 | |||
1082 | return 0; | ||
1083 | } | ||
1084 | |||
1085 | static int | ||
1086 | process_period_event(event_t *event, unsigned long offset, unsigned long head) | ||
1087 | { | ||
1088 | dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n", | ||
1089 | (void *)(offset + head), | ||
1090 | (void *)(long)(event->header.size), | ||
1091 | event->period.time, | ||
1092 | event->period.id, | ||
1093 | event->period.sample_period); | ||
1094 | |||
1095 | return 0; | ||
1096 | } | ||
1097 | |||
1098 | static int | ||
1099 | process_event(event_t *event, unsigned long offset, unsigned long head) | ||
1100 | { | ||
1101 | if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) | ||
1102 | return process_overflow_event(event, offset, head); | ||
1103 | |||
1104 | switch (event->header.type) { | ||
1105 | case PERF_EVENT_MMAP: | ||
1106 | return process_mmap_event(event, offset, head); | ||
1107 | |||
1108 | case PERF_EVENT_COMM: | ||
1109 | return process_comm_event(event, offset, head); | ||
1110 | |||
1111 | case PERF_EVENT_FORK: | ||
1112 | return process_fork_event(event, offset, head); | ||
1113 | |||
1114 | case PERF_EVENT_PERIOD: | ||
1115 | return process_period_event(event, offset, head); | ||
1116 | /* | ||
1117 | * We dont process them right now but they are fine: | ||
1118 | */ | ||
1119 | |||
1120 | case PERF_EVENT_THROTTLE: | ||
1121 | case PERF_EVENT_UNTHROTTLE: | ||
1122 | return 0; | ||
1123 | |||
1124 | default: | ||
1125 | return -1; | ||
1126 | } | ||
1127 | |||
1128 | return 0; | ||
1129 | } | ||
1130 | |||
1131 | static int __cmd_report(void) | ||
1132 | { | ||
1133 | int ret, rc = EXIT_FAILURE; | ||
1134 | unsigned long offset = 0; | ||
1135 | unsigned long head = 0; | ||
1136 | struct stat stat; | ||
1137 | event_t *event; | ||
1138 | uint32_t size; | ||
1139 | char *buf; | ||
1140 | |||
1141 | register_idle_thread(); | ||
1142 | |||
1143 | input = open(input_name, O_RDONLY); | ||
1144 | if (input < 0) { | ||
1145 | fprintf(stderr, " failed to open file: %s", input_name); | ||
1146 | if (!strcmp(input_name, "perf.data")) | ||
1147 | fprintf(stderr, " (try 'perf record' first)"); | ||
1148 | fprintf(stderr, "\n"); | ||
1149 | exit(-1); | ||
1150 | } | ||
1151 | |||
1152 | ret = fstat(input, &stat); | ||
1153 | if (ret < 0) { | ||
1154 | perror("failed to stat file"); | ||
1155 | exit(-1); | ||
1156 | } | ||
1157 | |||
1158 | if (!stat.st_size) { | ||
1159 | fprintf(stderr, "zero-sized file, nothing to do!\n"); | ||
1160 | exit(0); | ||
1161 | } | ||
1162 | |||
1163 | if (load_kernel() < 0) { | ||
1164 | perror("failed to load kernel symbols"); | ||
1165 | return EXIT_FAILURE; | ||
1166 | } | ||
1167 | |||
1168 | if (!full_paths) { | ||
1169 | if (getcwd(__cwd, sizeof(__cwd)) == NULL) { | ||
1170 | perror("failed to get the current directory"); | ||
1171 | return EXIT_FAILURE; | ||
1172 | } | ||
1173 | cwdlen = strlen(cwd); | ||
1174 | } else { | ||
1175 | cwd = NULL; | ||
1176 | cwdlen = 0; | ||
1177 | } | ||
1178 | remap: | ||
1179 | buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, | ||
1180 | MAP_SHARED, input, offset); | ||
1181 | if (buf == MAP_FAILED) { | ||
1182 | perror("failed to mmap file"); | ||
1183 | exit(-1); | ||
1184 | } | ||
1185 | |||
1186 | more: | ||
1187 | event = (event_t *)(buf + head); | ||
1188 | |||
1189 | size = event->header.size; | ||
1190 | if (!size) | ||
1191 | size = 8; | ||
1192 | |||
1193 | if (head + event->header.size >= page_size * mmap_window) { | ||
1194 | unsigned long shift = page_size * (head / page_size); | ||
1195 | int ret; | ||
1196 | |||
1197 | ret = munmap(buf, page_size * mmap_window); | ||
1198 | assert(ret == 0); | ||
1199 | |||
1200 | offset += shift; | ||
1201 | head -= shift; | ||
1202 | goto remap; | ||
1203 | } | ||
1204 | |||
1205 | size = event->header.size; | ||
1206 | |||
1207 | dprintf("%p [%p]: event: %d\n", | ||
1208 | (void *)(offset + head), | ||
1209 | (void *)(long)event->header.size, | ||
1210 | event->header.type); | ||
1211 | |||
1212 | if (!size || process_event(event, offset, head) < 0) { | ||
1213 | |||
1214 | dprintf("%p [%p]: skipping unknown header type: %d\n", | ||
1215 | (void *)(offset + head), | ||
1216 | (void *)(long)(event->header.size), | ||
1217 | event->header.type); | ||
1218 | |||
1219 | total_unknown++; | ||
1220 | |||
1221 | /* | ||
1222 | * assume we lost track of the stream, check alignment, and | ||
1223 | * increment a single u64 in the hope to catch on again 'soon'. | ||
1224 | */ | ||
1225 | |||
1226 | if (unlikely(head & 7)) | ||
1227 | head &= ~7ULL; | ||
1228 | |||
1229 | size = 8; | ||
1230 | } | ||
1231 | |||
1232 | head += size; | ||
1233 | |||
1234 | if (offset + head < stat.st_size) | ||
1235 | goto more; | ||
1236 | |||
1237 | rc = EXIT_SUCCESS; | ||
1238 | close(input); | ||
1239 | |||
1240 | dprintf(" IP events: %10ld\n", total); | ||
1241 | dprintf(" mmap events: %10ld\n", total_mmap); | ||
1242 | dprintf(" comm events: %10ld\n", total_comm); | ||
1243 | dprintf(" fork events: %10ld\n", total_fork); | ||
1244 | dprintf(" unknown events: %10ld\n", total_unknown); | ||
1245 | |||
1246 | if (dump_trace) | ||
1247 | return 0; | ||
1248 | |||
1249 | if (verbose >= 3) | ||
1250 | threads__fprintf(stdout); | ||
1251 | |||
1252 | if (verbose >= 2) | ||
1253 | dsos__fprintf(stdout); | ||
1254 | |||
1255 | collapse__resort(); | ||
1256 | output__resort(); | ||
1257 | output__fprintf(stdout, total); | ||
1258 | |||
1259 | return rc; | ||
1260 | } | ||
1261 | |||
1262 | static const char * const report_usage[] = { | ||
1263 | "perf report [<options>] <command>", | ||
1264 | NULL | ||
1265 | }; | ||
1266 | |||
1267 | static const struct option options[] = { | ||
1268 | OPT_STRING('i', "input", &input_name, "file", | ||
1269 | "input file name"), | ||
1270 | OPT_BOOLEAN('v', "verbose", &verbose, | ||
1271 | "be more verbose (show symbol address, etc)"), | ||
1272 | OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, | ||
1273 | "dump raw trace in ASCII"), | ||
1274 | OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), | ||
1275 | OPT_STRING('s', "sort", &sort_order, "key[,key2...]", | ||
1276 | "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"), | ||
1277 | OPT_BOOLEAN('P', "full-paths", &full_paths, | ||
1278 | "Don't shorten the pathnames taking into account the cwd"), | ||
1279 | OPT_END() | ||
1280 | }; | ||
1281 | |||
1282 | static void setup_sorting(void) | ||
1283 | { | ||
1284 | char *tmp, *tok, *str = strdup(sort_order); | ||
1285 | |||
1286 | for (tok = strtok_r(str, ", ", &tmp); | ||
1287 | tok; tok = strtok_r(NULL, ", ", &tmp)) { | ||
1288 | if (sort_dimension__add(tok) < 0) { | ||
1289 | error("Unknown --sort key: `%s'", tok); | ||
1290 | usage_with_options(report_usage, options); | ||
1291 | } | ||
1292 | } | ||
1293 | |||
1294 | free(str); | ||
1295 | } | ||
1296 | |||
1297 | int cmd_report(int argc, const char **argv, const char *prefix) | ||
1298 | { | ||
1299 | symbol__init(); | ||
1300 | |||
1301 | page_size = getpagesize(); | ||
1302 | |||
1303 | argc = parse_options(argc, argv, options, report_usage, 0); | ||
1304 | |||
1305 | setup_sorting(); | ||
1306 | |||
1307 | /* | ||
1308 | * Any (unrecognized) arguments left? | ||
1309 | */ | ||
1310 | if (argc) | ||
1311 | usage_with_options(report_usage, options); | ||
1312 | |||
1313 | setup_pager(); | ||
1314 | |||
1315 | return __cmd_report(); | ||
1316 | } | ||
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c new file mode 100644 index 000000000000..c43e4a97dc42 --- /dev/null +++ b/tools/perf/builtin-stat.c | |||
@@ -0,0 +1,367 @@ | |||
1 | /* | ||
2 | * builtin-stat.c | ||
3 | * | ||
4 | * Builtin stat command: Give a precise performance counters summary | ||
5 | * overview about any workload, CPU or specific PID. | ||
6 | * | ||
7 | * Sample output: | ||
8 | |||
9 | $ perf stat ~/hackbench 10 | ||
10 | Time: 0.104 | ||
11 | |||
12 | Performance counter stats for '/home/mingo/hackbench': | ||
13 | |||
14 | 1255.538611 task clock ticks # 10.143 CPU utilization factor | ||
15 | 54011 context switches # 0.043 M/sec | ||
16 | 385 CPU migrations # 0.000 M/sec | ||
17 | 17755 pagefaults # 0.014 M/sec | ||
18 | 3808323185 CPU cycles # 3033.219 M/sec | ||
19 | 1575111190 instructions # 1254.530 M/sec | ||
20 | 17367895 cache references # 13.833 M/sec | ||
21 | 7674421 cache misses # 6.112 M/sec | ||
22 | |||
23 | Wall-clock time elapsed: 123.786620 msecs | ||
24 | |||
25 | * | ||
26 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | ||
27 | * | ||
28 | * Improvements and fixes by: | ||
29 | * | ||
30 | * Arjan van de Ven <arjan@linux.intel.com> | ||
31 | * Yanmin Zhang <yanmin.zhang@intel.com> | ||
32 | * Wu Fengguang <fengguang.wu@intel.com> | ||
33 | * Mike Galbraith <efault@gmx.de> | ||
34 | * Paul Mackerras <paulus@samba.org> | ||
35 | * | ||
36 | * Released under the GPL v2. (and only v2, not any later version) | ||
37 | */ | ||
38 | |||
39 | #include "perf.h" | ||
40 | #include "builtin.h" | ||
41 | #include "util/util.h" | ||
42 | #include "util/parse-options.h" | ||
43 | #include "util/parse-events.h" | ||
44 | |||
45 | #include <sys/prctl.h> | ||
46 | |||
47 | static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { | ||
48 | |||
49 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, | ||
50 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, | ||
51 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, | ||
52 | { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, | ||
53 | |||
54 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, | ||
55 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, | ||
56 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, | ||
57 | { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, | ||
58 | |||
59 | }; | ||
60 | |||
61 | static int system_wide = 0; | ||
62 | static int inherit = 1; | ||
63 | static int verbose = 0; | ||
64 | |||
65 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
66 | |||
67 | static int target_pid = -1; | ||
68 | static int nr_cpus = 0; | ||
69 | static unsigned int page_size; | ||
70 | |||
71 | static int scale = 1; | ||
72 | |||
73 | static const unsigned int default_count[] = { | ||
74 | 1000000, | ||
75 | 1000000, | ||
76 | 10000, | ||
77 | 10000, | ||
78 | 1000000, | ||
79 | 10000, | ||
80 | }; | ||
81 | |||
82 | static __u64 event_res[MAX_COUNTERS][3]; | ||
83 | static __u64 event_scaled[MAX_COUNTERS]; | ||
84 | |||
85 | static __u64 runtime_nsecs; | ||
86 | static __u64 walltime_nsecs; | ||
87 | static __u64 runtime_cycles; | ||
88 | |||
89 | static void create_perf_stat_counter(int counter) | ||
90 | { | ||
91 | struct perf_counter_attr *attr = attrs + counter; | ||
92 | |||
93 | if (scale) | ||
94 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | ||
95 | PERF_FORMAT_TOTAL_TIME_RUNNING; | ||
96 | |||
97 | if (system_wide) { | ||
98 | int cpu; | ||
99 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | ||
100 | fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); | ||
101 | if (fd[cpu][counter] < 0 && verbose) { | ||
102 | printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); | ||
103 | } | ||
104 | } | ||
105 | } else { | ||
106 | attr->inherit = inherit; | ||
107 | attr->disabled = 1; | ||
108 | |||
109 | fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); | ||
110 | if (fd[0][counter] < 0 && verbose) { | ||
111 | printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); | ||
112 | } | ||
113 | } | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * Does the counter have nsecs as a unit? | ||
118 | */ | ||
119 | static inline int nsec_counter(int counter) | ||
120 | { | ||
121 | if (attrs[counter].type != PERF_TYPE_SOFTWARE) | ||
122 | return 0; | ||
123 | |||
124 | if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK) | ||
125 | return 1; | ||
126 | |||
127 | if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) | ||
128 | return 1; | ||
129 | |||
130 | return 0; | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * Read out the results of a single counter: | ||
135 | */ | ||
136 | static void read_counter(int counter) | ||
137 | { | ||
138 | __u64 *count, single_count[3]; | ||
139 | ssize_t res; | ||
140 | int cpu, nv; | ||
141 | int scaled; | ||
142 | |||
143 | count = event_res[counter]; | ||
144 | |||
145 | count[0] = count[1] = count[2] = 0; | ||
146 | |||
147 | nv = scale ? 3 : 1; | ||
148 | for (cpu = 0; cpu < nr_cpus; cpu ++) { | ||
149 | if (fd[cpu][counter] < 0) | ||
150 | continue; | ||
151 | |||
152 | res = read(fd[cpu][counter], single_count, nv * sizeof(__u64)); | ||
153 | assert(res == nv * sizeof(__u64)); | ||
154 | |||
155 | count[0] += single_count[0]; | ||
156 | if (scale) { | ||
157 | count[1] += single_count[1]; | ||
158 | count[2] += single_count[2]; | ||
159 | } | ||
160 | } | ||
161 | |||
162 | scaled = 0; | ||
163 | if (scale) { | ||
164 | if (count[2] == 0) { | ||
165 | event_scaled[counter] = -1; | ||
166 | count[0] = 0; | ||
167 | return; | ||
168 | } | ||
169 | |||
170 | if (count[2] < count[1]) { | ||
171 | event_scaled[counter] = 1; | ||
172 | count[0] = (unsigned long long) | ||
173 | ((double)count[0] * count[1] / count[2] + 0.5); | ||
174 | } | ||
175 | } | ||
176 | /* | ||
177 | * Save the full runtime - to allow normalization during printout: | ||
178 | */ | ||
179 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | ||
180 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) | ||
181 | runtime_nsecs = count[0]; | ||
182 | if (attrs[counter].type == PERF_TYPE_HARDWARE && | ||
183 | attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) | ||
184 | runtime_cycles = count[0]; | ||
185 | } | ||
186 | |||
187 | /* | ||
188 | * Print out the results of a single counter: | ||
189 | */ | ||
190 | static void print_counter(int counter) | ||
191 | { | ||
192 | __u64 *count; | ||
193 | int scaled; | ||
194 | |||
195 | count = event_res[counter]; | ||
196 | scaled = event_scaled[counter]; | ||
197 | |||
198 | if (scaled == -1) { | ||
199 | fprintf(stderr, " %14s %-20s\n", | ||
200 | "<not counted>", event_name(counter)); | ||
201 | return; | ||
202 | } | ||
203 | |||
204 | if (nsec_counter(counter)) { | ||
205 | double msecs = (double)count[0] / 1000000; | ||
206 | |||
207 | fprintf(stderr, " %14.6f %-20s", | ||
208 | msecs, event_name(counter)); | ||
209 | if (attrs[counter].type == PERF_TYPE_SOFTWARE && | ||
210 | attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { | ||
211 | |||
212 | if (walltime_nsecs) | ||
213 | fprintf(stderr, " # %11.3f CPU utilization factor", | ||
214 | (double)count[0] / (double)walltime_nsecs); | ||
215 | } | ||
216 | } else { | ||
217 | fprintf(stderr, " %14Ld %-20s", | ||
218 | count[0], event_name(counter)); | ||
219 | if (runtime_nsecs) | ||
220 | fprintf(stderr, " # %11.3f M/sec", | ||
221 | (double)count[0]/runtime_nsecs*1000.0); | ||
222 | if (runtime_cycles && | ||
223 | attrs[counter].type == PERF_TYPE_HARDWARE && | ||
224 | attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { | ||
225 | |||
226 | fprintf(stderr, " # %1.3f per cycle", | ||
227 | (double)count[0] / (double)runtime_cycles); | ||
228 | } | ||
229 | } | ||
230 | if (scaled) | ||
231 | fprintf(stderr, " (scaled from %.2f%%)", | ||
232 | (double) count[2] / count[1] * 100); | ||
233 | fprintf(stderr, "\n"); | ||
234 | } | ||
235 | |||
236 | static int do_perf_stat(int argc, const char **argv) | ||
237 | { | ||
238 | unsigned long long t0, t1; | ||
239 | int counter; | ||
240 | int status; | ||
241 | int pid; | ||
242 | int i; | ||
243 | |||
244 | if (!system_wide) | ||
245 | nr_cpus = 1; | ||
246 | |||
247 | for (counter = 0; counter < nr_counters; counter++) | ||
248 | create_perf_stat_counter(counter); | ||
249 | |||
250 | /* | ||
251 | * Enable counters and exec the command: | ||
252 | */ | ||
253 | t0 = rdclock(); | ||
254 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
255 | |||
256 | if ((pid = fork()) < 0) | ||
257 | perror("failed to fork"); | ||
258 | |||
259 | if (!pid) { | ||
260 | if (execvp(argv[0], (char **)argv)) { | ||
261 | perror(argv[0]); | ||
262 | exit(-1); | ||
263 | } | ||
264 | } | ||
265 | |||
266 | while (wait(&status) >= 0) | ||
267 | ; | ||
268 | |||
269 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
270 | t1 = rdclock(); | ||
271 | |||
272 | walltime_nsecs = t1 - t0; | ||
273 | |||
274 | fflush(stdout); | ||
275 | |||
276 | fprintf(stderr, "\n"); | ||
277 | fprintf(stderr, " Performance counter stats for \'%s", argv[0]); | ||
278 | |||
279 | for (i = 1; i < argc; i++) | ||
280 | fprintf(stderr, " %s", argv[i]); | ||
281 | |||
282 | fprintf(stderr, "\':\n"); | ||
283 | fprintf(stderr, "\n"); | ||
284 | |||
285 | for (counter = 0; counter < nr_counters; counter++) | ||
286 | read_counter(counter); | ||
287 | |||
288 | for (counter = 0; counter < nr_counters; counter++) | ||
289 | print_counter(counter); | ||
290 | |||
291 | |||
292 | fprintf(stderr, "\n"); | ||
293 | fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", | ||
294 | (double)(t1-t0)/1e6); | ||
295 | fprintf(stderr, "\n"); | ||
296 | |||
297 | return 0; | ||
298 | } | ||
299 | |||
300 | static volatile int signr = -1; | ||
301 | |||
302 | static void skip_signal(int signo) | ||
303 | { | ||
304 | signr = signo; | ||
305 | } | ||
306 | |||
307 | static void sig_atexit(void) | ||
308 | { | ||
309 | if (signr == -1) | ||
310 | return; | ||
311 | |||
312 | signal(signr, SIG_DFL); | ||
313 | kill(getpid(), signr); | ||
314 | } | ||
315 | |||
316 | static const char * const stat_usage[] = { | ||
317 | "perf stat [<options>] <command>", | ||
318 | NULL | ||
319 | }; | ||
320 | |||
321 | static const struct option options[] = { | ||
322 | OPT_CALLBACK('e', "event", NULL, "event", | ||
323 | "event selector. use 'perf list' to list available events", | ||
324 | parse_events), | ||
325 | OPT_BOOLEAN('i', "inherit", &inherit, | ||
326 | "child tasks inherit counters"), | ||
327 | OPT_INTEGER('p', "pid", &target_pid, | ||
328 | "stat events on existing pid"), | ||
329 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | ||
330 | "system-wide collection from all CPUs"), | ||
331 | OPT_BOOLEAN('S', "scale", &scale, | ||
332 | "scale/normalize counters"), | ||
333 | OPT_BOOLEAN('v', "verbose", &verbose, | ||
334 | "be more verbose (show counter open errors, etc)"), | ||
335 | OPT_END() | ||
336 | }; | ||
337 | |||
338 | int cmd_stat(int argc, const char **argv, const char *prefix) | ||
339 | { | ||
340 | page_size = sysconf(_SC_PAGE_SIZE); | ||
341 | |||
342 | memcpy(attrs, default_attrs, sizeof(attrs)); | ||
343 | |||
344 | argc = parse_options(argc, argv, options, stat_usage, 0); | ||
345 | if (!argc) | ||
346 | usage_with_options(stat_usage, options); | ||
347 | |||
348 | if (!nr_counters) | ||
349 | nr_counters = 8; | ||
350 | |||
351 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
352 | assert(nr_cpus <= MAX_NR_CPUS); | ||
353 | assert(nr_cpus >= 0); | ||
354 | |||
355 | /* | ||
356 | * We dont want to block the signals - that would cause | ||
357 | * child tasks to inherit that and Ctrl-C would not work. | ||
358 | * What we want is for Ctrl-C to work in the exec()-ed | ||
359 | * task, but being ignored by perf stat itself: | ||
360 | */ | ||
361 | atexit(sig_atexit); | ||
362 | signal(SIGINT, skip_signal); | ||
363 | signal(SIGALRM, skip_signal); | ||
364 | signal(SIGABRT, skip_signal); | ||
365 | |||
366 | return do_perf_stat(argc, argv); | ||
367 | } | ||
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c new file mode 100644 index 000000000000..fe338d3c5d7e --- /dev/null +++ b/tools/perf/builtin-top.c | |||
@@ -0,0 +1,736 @@ | |||
1 | /* | ||
2 | * builtin-top.c | ||
3 | * | ||
4 | * Builtin top command: Display a continuously updated profile of | ||
5 | * any workload, CPU or specific PID. | ||
6 | * | ||
7 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | ||
8 | * | ||
9 | * Improvements and fixes by: | ||
10 | * | ||
11 | * Arjan van de Ven <arjan@linux.intel.com> | ||
12 | * Yanmin Zhang <yanmin.zhang@intel.com> | ||
13 | * Wu Fengguang <fengguang.wu@intel.com> | ||
14 | * Mike Galbraith <efault@gmx.de> | ||
15 | * Paul Mackerras <paulus@samba.org> | ||
16 | * | ||
17 | * Released under the GPL v2. (and only v2, not any later version) | ||
18 | */ | ||
19 | #include "builtin.h" | ||
20 | |||
21 | #include "perf.h" | ||
22 | |||
23 | #include "util/symbol.h" | ||
24 | #include "util/color.h" | ||
25 | #include "util/util.h" | ||
26 | #include "util/rbtree.h" | ||
27 | #include "util/parse-options.h" | ||
28 | #include "util/parse-events.h" | ||
29 | |||
30 | #include <assert.h> | ||
31 | #include <fcntl.h> | ||
32 | |||
33 | #include <stdio.h> | ||
34 | |||
35 | #include <errno.h> | ||
36 | #include <time.h> | ||
37 | #include <sched.h> | ||
38 | #include <pthread.h> | ||
39 | |||
40 | #include <sys/syscall.h> | ||
41 | #include <sys/ioctl.h> | ||
42 | #include <sys/poll.h> | ||
43 | #include <sys/prctl.h> | ||
44 | #include <sys/wait.h> | ||
45 | #include <sys/uio.h> | ||
46 | #include <sys/mman.h> | ||
47 | |||
48 | #include <linux/unistd.h> | ||
49 | #include <linux/types.h> | ||
50 | |||
51 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | ||
52 | |||
53 | static int system_wide = 0; | ||
54 | |||
55 | static int default_interval = 100000; | ||
56 | |||
57 | static __u64 count_filter = 5; | ||
58 | static int print_entries = 15; | ||
59 | |||
60 | static int target_pid = -1; | ||
61 | static int profile_cpu = -1; | ||
62 | static int nr_cpus = 0; | ||
63 | static unsigned int realtime_prio = 0; | ||
64 | static int group = 0; | ||
65 | static unsigned int page_size; | ||
66 | static unsigned int mmap_pages = 16; | ||
67 | static int freq = 0; | ||
68 | static int verbose = 0; | ||
69 | |||
70 | static char *sym_filter; | ||
71 | static unsigned long filter_start; | ||
72 | static unsigned long filter_end; | ||
73 | |||
74 | static int delay_secs = 2; | ||
75 | static int zero; | ||
76 | static int dump_symtab; | ||
77 | |||
78 | /* | ||
79 | * Symbols | ||
80 | */ | ||
81 | |||
82 | static __u64 min_ip; | ||
83 | static __u64 max_ip = -1ll; | ||
84 | |||
85 | struct sym_entry { | ||
86 | struct rb_node rb_node; | ||
87 | struct list_head node; | ||
88 | unsigned long count[MAX_COUNTERS]; | ||
89 | unsigned long snap_count; | ||
90 | double weight; | ||
91 | int skip; | ||
92 | }; | ||
93 | |||
94 | struct sym_entry *sym_filter_entry; | ||
95 | |||
96 | struct dso *kernel_dso; | ||
97 | |||
98 | /* | ||
99 | * Symbols will be added here in record_ip and will get out | ||
100 | * after decayed. | ||
101 | */ | ||
102 | static LIST_HEAD(active_symbols); | ||
103 | static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER; | ||
104 | |||
105 | /* | ||
106 | * Ordering weight: count-1 * count-2 * ... / count-n | ||
107 | */ | ||
108 | static double sym_weight(const struct sym_entry *sym) | ||
109 | { | ||
110 | double weight = sym->snap_count; | ||
111 | int counter; | ||
112 | |||
113 | for (counter = 1; counter < nr_counters-1; counter++) | ||
114 | weight *= sym->count[counter]; | ||
115 | |||
116 | weight /= (sym->count[counter] + 1); | ||
117 | |||
118 | return weight; | ||
119 | } | ||
120 | |||
121 | static long samples; | ||
122 | static long userspace_samples; | ||
123 | static const char CONSOLE_CLEAR[] = "[H[2J"; | ||
124 | |||
125 | static void __list_insert_active_sym(struct sym_entry *syme) | ||
126 | { | ||
127 | list_add(&syme->node, &active_symbols); | ||
128 | } | ||
129 | |||
130 | static void list_remove_active_sym(struct sym_entry *syme) | ||
131 | { | ||
132 | pthread_mutex_lock(&active_symbols_lock); | ||
133 | list_del_init(&syme->node); | ||
134 | pthread_mutex_unlock(&active_symbols_lock); | ||
135 | } | ||
136 | |||
137 | static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) | ||
138 | { | ||
139 | struct rb_node **p = &tree->rb_node; | ||
140 | struct rb_node *parent = NULL; | ||
141 | struct sym_entry *iter; | ||
142 | |||
143 | while (*p != NULL) { | ||
144 | parent = *p; | ||
145 | iter = rb_entry(parent, struct sym_entry, rb_node); | ||
146 | |||
147 | if (se->weight > iter->weight) | ||
148 | p = &(*p)->rb_left; | ||
149 | else | ||
150 | p = &(*p)->rb_right; | ||
151 | } | ||
152 | |||
153 | rb_link_node(&se->rb_node, parent, p); | ||
154 | rb_insert_color(&se->rb_node, tree); | ||
155 | } | ||
156 | |||
157 | static void print_sym_table(void) | ||
158 | { | ||
159 | int printed = 0, j; | ||
160 | int counter; | ||
161 | float samples_per_sec = samples/delay_secs; | ||
162 | float ksamples_per_sec = (samples-userspace_samples)/delay_secs; | ||
163 | float sum_ksamples = 0.0; | ||
164 | struct sym_entry *syme, *n; | ||
165 | struct rb_root tmp = RB_ROOT; | ||
166 | struct rb_node *nd; | ||
167 | |||
168 | samples = userspace_samples = 0; | ||
169 | |||
170 | /* Sort the active symbols */ | ||
171 | pthread_mutex_lock(&active_symbols_lock); | ||
172 | syme = list_entry(active_symbols.next, struct sym_entry, node); | ||
173 | pthread_mutex_unlock(&active_symbols_lock); | ||
174 | |||
175 | list_for_each_entry_safe_from(syme, n, &active_symbols, node) { | ||
176 | syme->snap_count = syme->count[0]; | ||
177 | if (syme->snap_count != 0) { | ||
178 | syme->weight = sym_weight(syme); | ||
179 | rb_insert_active_sym(&tmp, syme); | ||
180 | sum_ksamples += syme->snap_count; | ||
181 | |||
182 | for (j = 0; j < nr_counters; j++) | ||
183 | syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8; | ||
184 | } else | ||
185 | list_remove_active_sym(syme); | ||
186 | } | ||
187 | |||
188 | puts(CONSOLE_CLEAR); | ||
189 | |||
190 | printf( | ||
191 | "------------------------------------------------------------------------------\n"); | ||
192 | printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", | ||
193 | samples_per_sec, | ||
194 | 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); | ||
195 | |||
196 | if (nr_counters == 1) { | ||
197 | printf("%Ld", attrs[0].sample_period); | ||
198 | if (freq) | ||
199 | printf("Hz "); | ||
200 | else | ||
201 | printf(" "); | ||
202 | } | ||
203 | |||
204 | for (counter = 0; counter < nr_counters; counter++) { | ||
205 | if (counter) | ||
206 | printf("/"); | ||
207 | |||
208 | printf("%s", event_name(counter)); | ||
209 | } | ||
210 | |||
211 | printf( "], "); | ||
212 | |||
213 | if (target_pid != -1) | ||
214 | printf(" (target_pid: %d", target_pid); | ||
215 | else | ||
216 | printf(" (all"); | ||
217 | |||
218 | if (profile_cpu != -1) | ||
219 | printf(", cpu: %d)\n", profile_cpu); | ||
220 | else { | ||
221 | if (target_pid != -1) | ||
222 | printf(")\n"); | ||
223 | else | ||
224 | printf(", %d CPUs)\n", nr_cpus); | ||
225 | } | ||
226 | |||
227 | printf("------------------------------------------------------------------------------\n\n"); | ||
228 | |||
229 | if (nr_counters == 1) | ||
230 | printf(" samples pcnt"); | ||
231 | else | ||
232 | printf(" weight samples pcnt"); | ||
233 | |||
234 | printf(" RIP kernel function\n" | ||
235 | " ______ _______ _____ ________________ _______________\n\n" | ||
236 | ); | ||
237 | |||
238 | for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { | ||
239 | struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node); | ||
240 | struct symbol *sym = (struct symbol *)(syme + 1); | ||
241 | char *color = PERF_COLOR_NORMAL; | ||
242 | double pcnt; | ||
243 | |||
244 | if (++printed > print_entries || syme->snap_count < count_filter) | ||
245 | continue; | ||
246 | |||
247 | pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / | ||
248 | sum_ksamples)); | ||
249 | |||
250 | /* | ||
251 | * We color high-overhead entries in red, mid-overhead | ||
252 | * entries in green - and keep the low overhead places | ||
253 | * normal: | ||
254 | */ | ||
255 | if (pcnt >= 5.0) { | ||
256 | color = PERF_COLOR_RED; | ||
257 | } else { | ||
258 | if (pcnt >= 0.5) | ||
259 | color = PERF_COLOR_GREEN; | ||
260 | } | ||
261 | |||
262 | if (nr_counters == 1) | ||
263 | printf("%20.2f - ", syme->weight); | ||
264 | else | ||
265 | printf("%9.1f %10ld - ", syme->weight, syme->snap_count); | ||
266 | |||
267 | color_fprintf(stdout, color, "%4.1f%%", pcnt); | ||
268 | printf(" - %016llx : %s\n", sym->start, sym->name); | ||
269 | } | ||
270 | } | ||
271 | |||
272 | static void *display_thread(void *arg) | ||
273 | { | ||
274 | struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; | ||
275 | int delay_msecs = delay_secs * 1000; | ||
276 | |||
277 | printf("PerfTop refresh period: %d seconds\n", delay_secs); | ||
278 | |||
279 | do { | ||
280 | print_sym_table(); | ||
281 | } while (!poll(&stdin_poll, 1, delay_msecs) == 1); | ||
282 | |||
283 | printf("key pressed - exiting.\n"); | ||
284 | exit(0); | ||
285 | |||
286 | return NULL; | ||
287 | } | ||
288 | |||
289 | static int symbol_filter(struct dso *self, struct symbol *sym) | ||
290 | { | ||
291 | static int filter_match; | ||
292 | struct sym_entry *syme; | ||
293 | const char *name = sym->name; | ||
294 | |||
295 | if (!strcmp(name, "_text") || | ||
296 | !strcmp(name, "_etext") || | ||
297 | !strcmp(name, "_sinittext") || | ||
298 | !strncmp("init_module", name, 11) || | ||
299 | !strncmp("cleanup_module", name, 14) || | ||
300 | strstr(name, "_text_start") || | ||
301 | strstr(name, "_text_end")) | ||
302 | return 1; | ||
303 | |||
304 | syme = dso__sym_priv(self, sym); | ||
305 | /* Tag samples to be skipped. */ | ||
306 | if (!strcmp("default_idle", name) || | ||
307 | !strcmp("cpu_idle", name) || | ||
308 | !strcmp("enter_idle", name) || | ||
309 | !strcmp("exit_idle", name) || | ||
310 | !strcmp("mwait_idle", name)) | ||
311 | syme->skip = 1; | ||
312 | |||
313 | if (filter_match == 1) { | ||
314 | filter_end = sym->start; | ||
315 | filter_match = -1; | ||
316 | if (filter_end - filter_start > 10000) { | ||
317 | fprintf(stderr, | ||
318 | "hm, too large filter symbol <%s> - skipping.\n", | ||
319 | sym_filter); | ||
320 | fprintf(stderr, "symbol filter start: %016lx\n", | ||
321 | filter_start); | ||
322 | fprintf(stderr, " end: %016lx\n", | ||
323 | filter_end); | ||
324 | filter_end = filter_start = 0; | ||
325 | sym_filter = NULL; | ||
326 | sleep(1); | ||
327 | } | ||
328 | } | ||
329 | |||
330 | if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) { | ||
331 | filter_match = 1; | ||
332 | filter_start = sym->start; | ||
333 | } | ||
334 | |||
335 | |||
336 | return 0; | ||
337 | } | ||
338 | |||
339 | static int parse_symbols(void) | ||
340 | { | ||
341 | struct rb_node *node; | ||
342 | struct symbol *sym; | ||
343 | |||
344 | kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry)); | ||
345 | if (kernel_dso == NULL) | ||
346 | return -1; | ||
347 | |||
348 | if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1) != 0) | ||
349 | goto out_delete_dso; | ||
350 | |||
351 | node = rb_first(&kernel_dso->syms); | ||
352 | sym = rb_entry(node, struct symbol, rb_node); | ||
353 | min_ip = sym->start; | ||
354 | |||
355 | node = rb_last(&kernel_dso->syms); | ||
356 | sym = rb_entry(node, struct symbol, rb_node); | ||
357 | max_ip = sym->end; | ||
358 | |||
359 | if (dump_symtab) | ||
360 | dso__fprintf(kernel_dso, stderr); | ||
361 | |||
362 | return 0; | ||
363 | |||
364 | out_delete_dso: | ||
365 | dso__delete(kernel_dso); | ||
366 | kernel_dso = NULL; | ||
367 | return -1; | ||
368 | } | ||
369 | |||
370 | #define TRACE_COUNT 3 | ||
371 | |||
372 | /* | ||
373 | * Binary search in the histogram table and record the hit: | ||
374 | */ | ||
375 | static void record_ip(__u64 ip, int counter) | ||
376 | { | ||
377 | struct symbol *sym = dso__find_symbol(kernel_dso, ip); | ||
378 | |||
379 | if (sym != NULL) { | ||
380 | struct sym_entry *syme = dso__sym_priv(kernel_dso, sym); | ||
381 | |||
382 | if (!syme->skip) { | ||
383 | syme->count[counter]++; | ||
384 | pthread_mutex_lock(&active_symbols_lock); | ||
385 | if (list_empty(&syme->node) || !syme->node.next) | ||
386 | __list_insert_active_sym(syme); | ||
387 | pthread_mutex_unlock(&active_symbols_lock); | ||
388 | return; | ||
389 | } | ||
390 | } | ||
391 | |||
392 | samples--; | ||
393 | } | ||
394 | |||
395 | static void process_event(__u64 ip, int counter) | ||
396 | { | ||
397 | samples++; | ||
398 | |||
399 | if (ip < min_ip || ip > max_ip) { | ||
400 | userspace_samples++; | ||
401 | return; | ||
402 | } | ||
403 | |||
404 | record_ip(ip, counter); | ||
405 | } | ||
406 | |||
407 | struct mmap_data { | ||
408 | int counter; | ||
409 | void *base; | ||
410 | unsigned int mask; | ||
411 | unsigned int prev; | ||
412 | }; | ||
413 | |||
414 | static unsigned int mmap_read_head(struct mmap_data *md) | ||
415 | { | ||
416 | struct perf_counter_mmap_page *pc = md->base; | ||
417 | int head; | ||
418 | |||
419 | head = pc->data_head; | ||
420 | rmb(); | ||
421 | |||
422 | return head; | ||
423 | } | ||
424 | |||
425 | struct timeval last_read, this_read; | ||
426 | |||
427 | static void mmap_read_counter(struct mmap_data *md) | ||
428 | { | ||
429 | unsigned int head = mmap_read_head(md); | ||
430 | unsigned int old = md->prev; | ||
431 | unsigned char *data = md->base + page_size; | ||
432 | int diff; | ||
433 | |||
434 | gettimeofday(&this_read, NULL); | ||
435 | |||
436 | /* | ||
437 | * If we're further behind than half the buffer, there's a chance | ||
438 | * the writer will bite our tail and mess up the samples under us. | ||
439 | * | ||
440 | * If we somehow ended up ahead of the head, we got messed up. | ||
441 | * | ||
442 | * In either case, truncate and restart at head. | ||
443 | */ | ||
444 | diff = head - old; | ||
445 | if (diff > md->mask / 2 || diff < 0) { | ||
446 | struct timeval iv; | ||
447 | unsigned long msecs; | ||
448 | |||
449 | timersub(&this_read, &last_read, &iv); | ||
450 | msecs = iv.tv_sec*1000 + iv.tv_usec/1000; | ||
451 | |||
452 | fprintf(stderr, "WARNING: failed to keep up with mmap data." | ||
453 | " Last read %lu msecs ago.\n", msecs); | ||
454 | |||
455 | /* | ||
456 | * head points to a known good entry, start there. | ||
457 | */ | ||
458 | old = head; | ||
459 | } | ||
460 | |||
461 | last_read = this_read; | ||
462 | |||
463 | for (; old != head;) { | ||
464 | struct ip_event { | ||
465 | struct perf_event_header header; | ||
466 | __u64 ip; | ||
467 | __u32 pid, target_pid; | ||
468 | }; | ||
469 | struct mmap_event { | ||
470 | struct perf_event_header header; | ||
471 | __u32 pid, target_pid; | ||
472 | __u64 start; | ||
473 | __u64 len; | ||
474 | __u64 pgoff; | ||
475 | char filename[PATH_MAX]; | ||
476 | }; | ||
477 | |||
478 | typedef union event_union { | ||
479 | struct perf_event_header header; | ||
480 | struct ip_event ip; | ||
481 | struct mmap_event mmap; | ||
482 | } event_t; | ||
483 | |||
484 | event_t *event = (event_t *)&data[old & md->mask]; | ||
485 | |||
486 | event_t event_copy; | ||
487 | |||
488 | size_t size = event->header.size; | ||
489 | |||
490 | /* | ||
491 | * Event straddles the mmap boundary -- header should always | ||
492 | * be inside due to u64 alignment of output. | ||
493 | */ | ||
494 | if ((old & md->mask) + size != ((old + size) & md->mask)) { | ||
495 | unsigned int offset = old; | ||
496 | unsigned int len = min(sizeof(*event), size), cpy; | ||
497 | void *dst = &event_copy; | ||
498 | |||
499 | do { | ||
500 | cpy = min(md->mask + 1 - (offset & md->mask), len); | ||
501 | memcpy(dst, &data[offset & md->mask], cpy); | ||
502 | offset += cpy; | ||
503 | dst += cpy; | ||
504 | len -= cpy; | ||
505 | } while (len); | ||
506 | |||
507 | event = &event_copy; | ||
508 | } | ||
509 | |||
510 | old += size; | ||
511 | |||
512 | if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { | ||
513 | if (event->header.type & PERF_SAMPLE_IP) | ||
514 | process_event(event->ip.ip, md->counter); | ||
515 | } | ||
516 | } | ||
517 | |||
518 | md->prev = old; | ||
519 | } | ||
520 | |||
521 | static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; | ||
522 | static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; | ||
523 | |||
524 | static void mmap_read(void) | ||
525 | { | ||
526 | int i, counter; | ||
527 | |||
528 | for (i = 0; i < nr_cpus; i++) { | ||
529 | for (counter = 0; counter < nr_counters; counter++) | ||
530 | mmap_read_counter(&mmap_array[i][counter]); | ||
531 | } | ||
532 | } | ||
533 | |||
534 | int nr_poll; | ||
535 | int group_fd; | ||
536 | |||
537 | static void start_counter(int i, int counter) | ||
538 | { | ||
539 | struct perf_counter_attr *attr; | ||
540 | unsigned int cpu; | ||
541 | |||
542 | cpu = profile_cpu; | ||
543 | if (target_pid == -1 && profile_cpu == -1) | ||
544 | cpu = i; | ||
545 | |||
546 | attr = attrs + counter; | ||
547 | |||
548 | attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; | ||
549 | attr->freq = freq; | ||
550 | |||
551 | try_again: | ||
552 | fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0); | ||
553 | |||
554 | if (fd[i][counter] < 0) { | ||
555 | int err = errno; | ||
556 | |||
557 | if (err == EPERM) | ||
558 | die("No permission - are you root?\n"); | ||
559 | /* | ||
560 | * If it's cycles then fall back to hrtimer | ||
561 | * based cpu-clock-tick sw counter, which | ||
562 | * is always available even if no PMU support: | ||
563 | */ | ||
564 | if (attr->type == PERF_TYPE_HARDWARE | ||
565 | && attr->config == PERF_COUNT_HW_CPU_CYCLES) { | ||
566 | |||
567 | if (verbose) | ||
568 | warning(" ... trying to fall back to cpu-clock-ticks\n"); | ||
569 | |||
570 | attr->type = PERF_TYPE_SOFTWARE; | ||
571 | attr->config = PERF_COUNT_SW_CPU_CLOCK; | ||
572 | goto try_again; | ||
573 | } | ||
574 | printf("\n"); | ||
575 | error("perfcounter syscall returned with %d (%s)\n", | ||
576 | fd[i][counter], strerror(err)); | ||
577 | die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n"); | ||
578 | exit(-1); | ||
579 | } | ||
580 | assert(fd[i][counter] >= 0); | ||
581 | fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); | ||
582 | |||
583 | /* | ||
584 | * First counter acts as the group leader: | ||
585 | */ | ||
586 | if (group && group_fd == -1) | ||
587 | group_fd = fd[i][counter]; | ||
588 | |||
589 | event_array[nr_poll].fd = fd[i][counter]; | ||
590 | event_array[nr_poll].events = POLLIN; | ||
591 | nr_poll++; | ||
592 | |||
593 | mmap_array[i][counter].counter = counter; | ||
594 | mmap_array[i][counter].prev = 0; | ||
595 | mmap_array[i][counter].mask = mmap_pages*page_size - 1; | ||
596 | mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | ||
597 | PROT_READ, MAP_SHARED, fd[i][counter], 0); | ||
598 | if (mmap_array[i][counter].base == MAP_FAILED) | ||
599 | die("failed to mmap with %d (%s)\n", errno, strerror(errno)); | ||
600 | } | ||
601 | |||
602 | static int __cmd_top(void) | ||
603 | { | ||
604 | pthread_t thread; | ||
605 | int i, counter; | ||
606 | int ret; | ||
607 | |||
608 | for (i = 0; i < nr_cpus; i++) { | ||
609 | group_fd = -1; | ||
610 | for (counter = 0; counter < nr_counters; counter++) | ||
611 | start_counter(i, counter); | ||
612 | } | ||
613 | |||
614 | /* Wait for a minimal set of events before starting the snapshot */ | ||
615 | poll(event_array, nr_poll, 100); | ||
616 | |||
617 | mmap_read(); | ||
618 | |||
619 | if (pthread_create(&thread, NULL, display_thread, NULL)) { | ||
620 | printf("Could not create display thread.\n"); | ||
621 | exit(-1); | ||
622 | } | ||
623 | |||
624 | if (realtime_prio) { | ||
625 | struct sched_param param; | ||
626 | |||
627 | param.sched_priority = realtime_prio; | ||
628 | if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { | ||
629 | printf("Could not set realtime priority.\n"); | ||
630 | exit(-1); | ||
631 | } | ||
632 | } | ||
633 | |||
634 | while (1) { | ||
635 | int hits = samples; | ||
636 | |||
637 | mmap_read(); | ||
638 | |||
639 | if (hits == samples) | ||
640 | ret = poll(event_array, nr_poll, 100); | ||
641 | } | ||
642 | |||
643 | return 0; | ||
644 | } | ||
645 | |||
646 | static const char * const top_usage[] = { | ||
647 | "perf top [<options>]", | ||
648 | NULL | ||
649 | }; | ||
650 | |||
651 | static const struct option options[] = { | ||
652 | OPT_CALLBACK('e', "event", NULL, "event", | ||
653 | "event selector. use 'perf list' to list available events", | ||
654 | parse_events), | ||
655 | OPT_INTEGER('c', "count", &default_interval, | ||
656 | "event period to sample"), | ||
657 | OPT_INTEGER('p', "pid", &target_pid, | ||
658 | "profile events on existing pid"), | ||
659 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | ||
660 | "system-wide collection from all CPUs"), | ||
661 | OPT_INTEGER('C', "CPU", &profile_cpu, | ||
662 | "CPU to profile on"), | ||
663 | OPT_INTEGER('m', "mmap-pages", &mmap_pages, | ||
664 | "number of mmap data pages"), | ||
665 | OPT_INTEGER('r', "realtime", &realtime_prio, | ||
666 | "collect data with this RT SCHED_FIFO priority"), | ||
667 | OPT_INTEGER('d', "delay", &delay_secs, | ||
668 | "number of seconds to delay between refreshes"), | ||
669 | OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, | ||
670 | "dump the symbol table used for profiling"), | ||
671 | OPT_INTEGER('f', "count-filter", &count_filter, | ||
672 | "only display functions with more events than this"), | ||
673 | OPT_BOOLEAN('g', "group", &group, | ||
674 | "put the counters into a counter group"), | ||
675 | OPT_STRING('s', "sym-filter", &sym_filter, "pattern", | ||
676 | "only display symbols matchig this pattern"), | ||
677 | OPT_BOOLEAN('z', "zero", &group, | ||
678 | "zero history across updates"), | ||
679 | OPT_INTEGER('F', "freq", &freq, | ||
680 | "profile at this frequency"), | ||
681 | OPT_INTEGER('E', "entries", &print_entries, | ||
682 | "display this many functions"), | ||
683 | OPT_BOOLEAN('v', "verbose", &verbose, | ||
684 | "be more verbose (show counter open errors, etc)"), | ||
685 | OPT_END() | ||
686 | }; | ||
687 | |||
688 | int cmd_top(int argc, const char **argv, const char *prefix) | ||
689 | { | ||
690 | int counter; | ||
691 | |||
692 | page_size = sysconf(_SC_PAGE_SIZE); | ||
693 | |||
694 | argc = parse_options(argc, argv, options, top_usage, 0); | ||
695 | if (argc) | ||
696 | usage_with_options(top_usage, options); | ||
697 | |||
698 | if (freq) { | ||
699 | default_interval = freq; | ||
700 | freq = 1; | ||
701 | } | ||
702 | |||
703 | /* CPU and PID are mutually exclusive */ | ||
704 | if (target_pid != -1 && profile_cpu != -1) { | ||
705 | printf("WARNING: PID switch overriding CPU\n"); | ||
706 | sleep(1); | ||
707 | profile_cpu = -1; | ||
708 | } | ||
709 | |||
710 | if (!nr_counters) | ||
711 | nr_counters = 1; | ||
712 | |||
713 | if (delay_secs < 1) | ||
714 | delay_secs = 1; | ||
715 | |||
716 | parse_symbols(); | ||
717 | |||
718 | /* | ||
719 | * Fill in the ones not specifically initialized via -c: | ||
720 | */ | ||
721 | for (counter = 0; counter < nr_counters; counter++) { | ||
722 | if (attrs[counter].sample_period) | ||
723 | continue; | ||
724 | |||
725 | attrs[counter].sample_period = default_interval; | ||
726 | } | ||
727 | |||
728 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
729 | assert(nr_cpus <= MAX_NR_CPUS); | ||
730 | assert(nr_cpus >= 0); | ||
731 | |||
732 | if (target_pid != -1 || profile_cpu != -1) | ||
733 | nr_cpus = 1; | ||
734 | |||
735 | return __cmd_top(); | ||
736 | } | ||
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h new file mode 100644 index 000000000000..51d168230ee7 --- /dev/null +++ b/tools/perf/builtin.h | |||
@@ -0,0 +1,26 @@ | |||
1 | #ifndef BUILTIN_H | ||
2 | #define BUILTIN_H | ||
3 | |||
4 | #include "util/util.h" | ||
5 | #include "util/strbuf.h" | ||
6 | |||
7 | extern const char perf_version_string[]; | ||
8 | extern const char perf_usage_string[]; | ||
9 | extern const char perf_more_info_string[]; | ||
10 | |||
11 | extern void list_common_cmds_help(void); | ||
12 | extern const char *help_unknown_cmd(const char *cmd); | ||
13 | extern void prune_packed_objects(int); | ||
14 | extern int read_line_with_nul(char *buf, int size, FILE *file); | ||
15 | extern int check_pager_config(const char *cmd); | ||
16 | |||
17 | extern int cmd_annotate(int argc, const char **argv, const char *prefix); | ||
18 | extern int cmd_help(int argc, const char **argv, const char *prefix); | ||
19 | extern int cmd_record(int argc, const char **argv, const char *prefix); | ||
20 | extern int cmd_report(int argc, const char **argv, const char *prefix); | ||
21 | extern int cmd_stat(int argc, const char **argv, const char *prefix); | ||
22 | extern int cmd_top(int argc, const char **argv, const char *prefix); | ||
23 | extern int cmd_version(int argc, const char **argv, const char *prefix); | ||
24 | extern int cmd_list(int argc, const char **argv, const char *prefix); | ||
25 | |||
26 | #endif | ||
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt new file mode 100644 index 000000000000..eebce30afbc0 --- /dev/null +++ b/tools/perf/command-list.txt | |||
@@ -0,0 +1,10 @@ | |||
1 | # | ||
2 | # List of known perf commands. | ||
3 | # command name category [deprecated] [common] | ||
4 | # | ||
5 | perf-annotate mainporcelain common | ||
6 | perf-list mainporcelain common | ||
7 | perf-record mainporcelain common | ||
8 | perf-report mainporcelain common | ||
9 | perf-stat mainporcelain common | ||
10 | perf-top mainporcelain common | ||
diff --git a/tools/perf/design.txt b/tools/perf/design.txt new file mode 100644 index 000000000000..860e116d979c --- /dev/null +++ b/tools/perf/design.txt | |||
@@ -0,0 +1,442 @@ | |||
1 | |||
2 | Performance Counters for Linux | ||
3 | ------------------------------ | ||
4 | |||
5 | Performance counters are special hardware registers available on most modern | ||
6 | CPUs. These registers count the number of certain types of hw events: such | ||
7 | as instructions executed, cachemisses suffered, or branches mis-predicted - | ||
8 | without slowing down the kernel or applications. These registers can also | ||
9 | trigger interrupts when a threshold number of events have passed - and can | ||
10 | thus be used to profile the code that runs on that CPU. | ||
11 | |||
12 | The Linux Performance Counter subsystem provides an abstraction of these | ||
13 | hardware capabilities. It provides per task and per CPU counters, counter | ||
14 | groups, and it provides event capabilities on top of those. It | ||
15 | provides "virtual" 64-bit counters, regardless of the width of the | ||
16 | underlying hardware counters. | ||
17 | |||
18 | Performance counters are accessed via special file descriptors. | ||
19 | There's one file descriptor per virtual counter used. | ||
20 | |||
21 | The special file descriptor is opened via the perf_counter_open() | ||
22 | system call: | ||
23 | |||
24 | int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr, | ||
25 | pid_t pid, int cpu, int group_fd, | ||
26 | unsigned long flags); | ||
27 | |||
28 | The syscall returns the new fd. The fd can be used via the normal | ||
29 | VFS system calls: read() can be used to read the counter, fcntl() | ||
30 | can be used to set the blocking mode, etc. | ||
31 | |||
32 | Multiple counters can be kept open at a time, and the counters | ||
33 | can be poll()ed. | ||
34 | |||
35 | When creating a new counter fd, 'perf_counter_hw_event' is: | ||
36 | |||
37 | struct perf_counter_hw_event { | ||
38 | /* | ||
39 | * The MSB of the config word signifies if the rest contains cpu | ||
40 | * specific (raw) counter configuration data, if unset, the next | ||
41 | * 7 bits are an event type and the rest of the bits are the event | ||
42 | * identifier. | ||
43 | */ | ||
44 | __u64 config; | ||
45 | |||
46 | __u64 irq_period; | ||
47 | __u32 record_type; | ||
48 | __u32 read_format; | ||
49 | |||
50 | __u64 disabled : 1, /* off by default */ | ||
51 | inherit : 1, /* children inherit it */ | ||
52 | pinned : 1, /* must always be on PMU */ | ||
53 | exclusive : 1, /* only group on PMU */ | ||
54 | exclude_user : 1, /* don't count user */ | ||
55 | exclude_kernel : 1, /* ditto kernel */ | ||
56 | exclude_hv : 1, /* ditto hypervisor */ | ||
57 | exclude_idle : 1, /* don't count when idle */ | ||
58 | mmap : 1, /* include mmap data */ | ||
59 | munmap : 1, /* include munmap data */ | ||
60 | comm : 1, /* include comm data */ | ||
61 | |||
62 | __reserved_1 : 52; | ||
63 | |||
64 | __u32 extra_config_len; | ||
65 | __u32 wakeup_events; /* wakeup every n events */ | ||
66 | |||
67 | __u64 __reserved_2; | ||
68 | __u64 __reserved_3; | ||
69 | }; | ||
70 | |||
71 | The 'config' field specifies what the counter should count. It | ||
72 | is divided into 3 bit-fields: | ||
73 | |||
74 | raw_type: 1 bit (most significant bit) 0x8000_0000_0000_0000 | ||
75 | type: 7 bits (next most significant) 0x7f00_0000_0000_0000 | ||
76 | event_id: 56 bits (least significant) 0x00ff_ffff_ffff_ffff | ||
77 | |||
78 | If 'raw_type' is 1, then the counter will count a hardware event | ||
79 | specified by the remaining 63 bits of event_config. The encoding is | ||
80 | machine-specific. | ||
81 | |||
82 | If 'raw_type' is 0, then the 'type' field says what kind of counter | ||
83 | this is, with the following encoding: | ||
84 | |||
85 | enum perf_event_types { | ||
86 | PERF_TYPE_HARDWARE = 0, | ||
87 | PERF_TYPE_SOFTWARE = 1, | ||
88 | PERF_TYPE_TRACEPOINT = 2, | ||
89 | }; | ||
90 | |||
91 | A counter of PERF_TYPE_HARDWARE will count the hardware event | ||
92 | specified by 'event_id': | ||
93 | |||
94 | /* | ||
95 | * Generalized performance counter event types, used by the hw_event.event_id | ||
96 | * parameter of the sys_perf_counter_open() syscall: | ||
97 | */ | ||
98 | enum hw_event_ids { | ||
99 | /* | ||
100 | * Common hardware events, generalized by the kernel: | ||
101 | */ | ||
102 | PERF_COUNT_HW_CPU_CYCLES = 0, | ||
103 | PERF_COUNT_HW_INSTRUCTIONS = 1, | ||
104 | PERF_COUNT_HW_CACHE_REFERENCES = 2, | ||
105 | PERF_COUNT_HW_CACHE_MISSES = 3, | ||
106 | PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, | ||
107 | PERF_COUNT_HW_BRANCH_MISSES = 5, | ||
108 | PERF_COUNT_HW_BUS_CYCLES = 6, | ||
109 | }; | ||
110 | |||
111 | These are standardized types of events that work relatively uniformly | ||
112 | on all CPUs that implement Performance Counters support under Linux, | ||
113 | although there may be variations (e.g., different CPUs might count | ||
114 | cache references and misses at different levels of the cache hierarchy). | ||
115 | If a CPU is not able to count the selected event, then the system call | ||
116 | will return -EINVAL. | ||
117 | |||
118 | More hw_event_types are supported as well, but they are CPU-specific | ||
119 | and accessed as raw events. For example, to count "External bus | ||
120 | cycles while bus lock signal asserted" events on Intel Core CPUs, pass | ||
121 | in a 0x4064 event_id value and set hw_event.raw_type to 1. | ||
122 | |||
123 | A counter of type PERF_TYPE_SOFTWARE will count one of the available | ||
124 | software events, selected by 'event_id': | ||
125 | |||
126 | /* | ||
127 | * Special "software" counters provided by the kernel, even if the hardware | ||
128 | * does not support performance counters. These counters measure various | ||
129 | * physical and sw events of the kernel (and allow the profiling of them as | ||
130 | * well): | ||
131 | */ | ||
132 | enum sw_event_ids { | ||
133 | PERF_COUNT_SW_CPU_CLOCK = 0, | ||
134 | PERF_COUNT_SW_TASK_CLOCK = 1, | ||
135 | PERF_COUNT_SW_PAGE_FAULTS = 2, | ||
136 | PERF_COUNT_SW_CONTEXT_SWITCHES = 3, | ||
137 | PERF_COUNT_SW_CPU_MIGRATIONS = 4, | ||
138 | PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, | ||
139 | PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, | ||
140 | }; | ||
141 | |||
142 | Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event | ||
143 | tracer is available, and event_id values can be obtained from | ||
144 | /debug/tracing/events/*/*/id | ||
145 | |||
146 | |||
147 | Counters come in two flavours: counting counters and sampling | ||
148 | counters. A "counting" counter is one that is used for counting the | ||
149 | number of events that occur, and is characterised by having | ||
150 | irq_period = 0. | ||
151 | |||
152 | |||
153 | A read() on a counter returns the current value of the counter and possible | ||
154 | additional values as specified by 'read_format', each value is a u64 (8 bytes) | ||
155 | in size. | ||
156 | |||
157 | /* | ||
158 | * Bits that can be set in hw_event.read_format to request that | ||
159 | * reads on the counter should return the indicated quantities, | ||
160 | * in increasing order of bit value, after the counter value. | ||
161 | */ | ||
162 | enum perf_counter_read_format { | ||
163 | PERF_FORMAT_TOTAL_TIME_ENABLED = 1, | ||
164 | PERF_FORMAT_TOTAL_TIME_RUNNING = 2, | ||
165 | }; | ||
166 | |||
167 | Using these additional values one can establish the overcommit ratio for a | ||
168 | particular counter allowing one to take the round-robin scheduling effect | ||
169 | into account. | ||
170 | |||
171 | |||
172 | A "sampling" counter is one that is set up to generate an interrupt | ||
173 | every N events, where N is given by 'irq_period'. A sampling counter | ||
174 | has irq_period > 0. The record_type controls what data is recorded on each | ||
175 | interrupt: | ||
176 | |||
177 | /* | ||
178 | * Bits that can be set in hw_event.record_type to request information | ||
179 | * in the overflow packets. | ||
180 | */ | ||
181 | enum perf_counter_record_format { | ||
182 | PERF_RECORD_IP = 1U << 0, | ||
183 | PERF_RECORD_TID = 1U << 1, | ||
184 | PERF_RECORD_TIME = 1U << 2, | ||
185 | PERF_RECORD_ADDR = 1U << 3, | ||
186 | PERF_RECORD_GROUP = 1U << 4, | ||
187 | PERF_RECORD_CALLCHAIN = 1U << 5, | ||
188 | }; | ||
189 | |||
190 | Such (and other) events will be recorded in a ring-buffer, which is | ||
191 | available to user-space using mmap() (see below). | ||
192 | |||
193 | The 'disabled' bit specifies whether the counter starts out disabled | ||
194 | or enabled. If it is initially disabled, it can be enabled by ioctl | ||
195 | or prctl (see below). | ||
196 | |||
197 | The 'inherit' bit, if set, specifies that this counter should count | ||
198 | events on descendant tasks as well as the task specified. This only | ||
199 | applies to new descendents, not to any existing descendents at the | ||
200 | time the counter is created (nor to any new descendents of existing | ||
201 | descendents). | ||
202 | |||
203 | The 'pinned' bit, if set, specifies that the counter should always be | ||
204 | on the CPU if at all possible. It only applies to hardware counters | ||
205 | and only to group leaders. If a pinned counter cannot be put onto the | ||
206 | CPU (e.g. because there are not enough hardware counters or because of | ||
207 | a conflict with some other event), then the counter goes into an | ||
208 | 'error' state, where reads return end-of-file (i.e. read() returns 0) | ||
209 | until the counter is subsequently enabled or disabled. | ||
210 | |||
211 | The 'exclusive' bit, if set, specifies that when this counter's group | ||
212 | is on the CPU, it should be the only group using the CPU's counters. | ||
213 | In future, this will allow sophisticated monitoring programs to supply | ||
214 | extra configuration information via 'extra_config_len' to exploit | ||
215 | advanced features of the CPU's Performance Monitor Unit (PMU) that are | ||
216 | not otherwise accessible and that might disrupt other hardware | ||
217 | counters. | ||
218 | |||
219 | The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a | ||
220 | way to request that counting of events be restricted to times when the | ||
221 | CPU is in user, kernel and/or hypervisor mode. | ||
222 | |||
223 | The 'mmap' and 'munmap' bits allow recording of PROT_EXEC mmap/munmap | ||
224 | operations, these can be used to relate userspace IP addresses to actual | ||
225 | code, even after the mapping (or even the whole process) is gone, | ||
226 | these events are recorded in the ring-buffer (see below). | ||
227 | |||
228 | The 'comm' bit allows tracking of process comm data on process creation. | ||
229 | This too is recorded in the ring-buffer (see below). | ||
230 | |||
231 | The 'pid' parameter to the perf_counter_open() system call allows the | ||
232 | counter to be specific to a task: | ||
233 | |||
234 | pid == 0: if the pid parameter is zero, the counter is attached to the | ||
235 | current task. | ||
236 | |||
237 | pid > 0: the counter is attached to a specific task (if the current task | ||
238 | has sufficient privilege to do so) | ||
239 | |||
240 | pid < 0: all tasks are counted (per cpu counters) | ||
241 | |||
242 | The 'cpu' parameter allows a counter to be made specific to a CPU: | ||
243 | |||
244 | cpu >= 0: the counter is restricted to a specific CPU | ||
245 | cpu == -1: the counter counts on all CPUs | ||
246 | |||
247 | (Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.) | ||
248 | |||
249 | A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts | ||
250 | events of that task and 'follows' that task to whatever CPU the task | ||
251 | gets schedule to. Per task counters can be created by any user, for | ||
252 | their own tasks. | ||
253 | |||
254 | A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts | ||
255 | all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege. | ||
256 | |||
257 | The 'flags' parameter is currently unused and must be zero. | ||
258 | |||
259 | The 'group_fd' parameter allows counter "groups" to be set up. A | ||
260 | counter group has one counter which is the group "leader". The leader | ||
261 | is created first, with group_fd = -1 in the perf_counter_open call | ||
262 | that creates it. The rest of the group members are created | ||
263 | subsequently, with group_fd giving the fd of the group leader. | ||
264 | (A single counter on its own is created with group_fd = -1 and is | ||
265 | considered to be a group with only 1 member.) | ||
266 | |||
267 | A counter group is scheduled onto the CPU as a unit, that is, it will | ||
268 | only be put onto the CPU if all of the counters in the group can be | ||
269 | put onto the CPU. This means that the values of the member counters | ||
270 | can be meaningfully compared, added, divided (to get ratios), etc., | ||
271 | with each other, since they have counted events for the same set of | ||
272 | executed instructions. | ||
273 | |||
274 | |||
275 | Like stated, asynchronous events, like counter overflow or PROT_EXEC mmap | ||
276 | tracking are logged into a ring-buffer. This ring-buffer is created and | ||
277 | accessed through mmap(). | ||
278 | |||
279 | The mmap size should be 1+2^n pages, where the first page is a meta-data page | ||
280 | (struct perf_counter_mmap_page) that contains various bits of information such | ||
281 | as where the ring-buffer head is. | ||
282 | |||
283 | /* | ||
284 | * Structure of the page that can be mapped via mmap | ||
285 | */ | ||
286 | struct perf_counter_mmap_page { | ||
287 | __u32 version; /* version number of this structure */ | ||
288 | __u32 compat_version; /* lowest version this is compat with */ | ||
289 | |||
290 | /* | ||
291 | * Bits needed to read the hw counters in user-space. | ||
292 | * | ||
293 | * u32 seq; | ||
294 | * s64 count; | ||
295 | * | ||
296 | * do { | ||
297 | * seq = pc->lock; | ||
298 | * | ||
299 | * barrier() | ||
300 | * if (pc->index) { | ||
301 | * count = pmc_read(pc->index - 1); | ||
302 | * count += pc->offset; | ||
303 | * } else | ||
304 | * goto regular_read; | ||
305 | * | ||
306 | * barrier(); | ||
307 | * } while (pc->lock != seq); | ||
308 | * | ||
309 | * NOTE: for obvious reason this only works on self-monitoring | ||
310 | * processes. | ||
311 | */ | ||
312 | __u32 lock; /* seqlock for synchronization */ | ||
313 | __u32 index; /* hardware counter identifier */ | ||
314 | __s64 offset; /* add to hardware counter value */ | ||
315 | |||
316 | /* | ||
317 | * Control data for the mmap() data buffer. | ||
318 | * | ||
319 | * User-space reading this value should issue an rmb(), on SMP capable | ||
320 | * platforms, after reading this value -- see perf_counter_wakeup(). | ||
321 | */ | ||
322 | __u32 data_head; /* head in the data section */ | ||
323 | }; | ||
324 | |||
325 | NOTE: the hw-counter userspace bits are arch specific and are currently only | ||
326 | implemented on powerpc. | ||
327 | |||
328 | The following 2^n pages are the ring-buffer which contains events of the form: | ||
329 | |||
330 | #define PERF_EVENT_MISC_KERNEL (1 << 0) | ||
331 | #define PERF_EVENT_MISC_USER (1 << 1) | ||
332 | #define PERF_EVENT_MISC_OVERFLOW (1 << 2) | ||
333 | |||
334 | struct perf_event_header { | ||
335 | __u32 type; | ||
336 | __u16 misc; | ||
337 | __u16 size; | ||
338 | }; | ||
339 | |||
340 | enum perf_event_type { | ||
341 | |||
342 | /* | ||
343 | * The MMAP events record the PROT_EXEC mappings so that we can | ||
344 | * correlate userspace IPs to code. They have the following structure: | ||
345 | * | ||
346 | * struct { | ||
347 | * struct perf_event_header header; | ||
348 | * | ||
349 | * u32 pid, tid; | ||
350 | * u64 addr; | ||
351 | * u64 len; | ||
352 | * u64 pgoff; | ||
353 | * char filename[]; | ||
354 | * }; | ||
355 | */ | ||
356 | PERF_EVENT_MMAP = 1, | ||
357 | PERF_EVENT_MUNMAP = 2, | ||
358 | |||
359 | /* | ||
360 | * struct { | ||
361 | * struct perf_event_header header; | ||
362 | * | ||
363 | * u32 pid, tid; | ||
364 | * char comm[]; | ||
365 | * }; | ||
366 | */ | ||
367 | PERF_EVENT_COMM = 3, | ||
368 | |||
369 | /* | ||
370 | * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field | ||
371 | * will be PERF_RECORD_* | ||
372 | * | ||
373 | * struct { | ||
374 | * struct perf_event_header header; | ||
375 | * | ||
376 | * { u64 ip; } && PERF_RECORD_IP | ||
377 | * { u32 pid, tid; } && PERF_RECORD_TID | ||
378 | * { u64 time; } && PERF_RECORD_TIME | ||
379 | * { u64 addr; } && PERF_RECORD_ADDR | ||
380 | * | ||
381 | * { u64 nr; | ||
382 | * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP | ||
383 | * | ||
384 | * { u16 nr, | ||
385 | * hv, | ||
386 | * kernel, | ||
387 | * user; | ||
388 | * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN | ||
389 | * }; | ||
390 | */ | ||
391 | }; | ||
392 | |||
393 | NOTE: PERF_RECORD_CALLCHAIN is arch specific and currently only implemented | ||
394 | on x86. | ||
395 | |||
396 | Notification of new events is possible through poll()/select()/epoll() and | ||
397 | fcntl() managing signals. | ||
398 | |||
399 | Normally a notification is generated for every page filled, however one can | ||
400 | additionally set perf_counter_hw_event.wakeup_events to generate one every | ||
401 | so many counter overflow events. | ||
402 | |||
403 | Future work will include a splice() interface to the ring-buffer. | ||
404 | |||
405 | |||
406 | Counters can be enabled and disabled in two ways: via ioctl and via | ||
407 | prctl. When a counter is disabled, it doesn't count or generate | ||
408 | events but does continue to exist and maintain its count value. | ||
409 | |||
410 | An individual counter or counter group can be enabled with | ||
411 | |||
412 | ioctl(fd, PERF_COUNTER_IOC_ENABLE); | ||
413 | |||
414 | or disabled with | ||
415 | |||
416 | ioctl(fd, PERF_COUNTER_IOC_DISABLE); | ||
417 | |||
418 | Enabling or disabling the leader of a group enables or disables the | ||
419 | whole group; that is, while the group leader is disabled, none of the | ||
420 | counters in the group will count. Enabling or disabling a member of a | ||
421 | group other than the leader only affects that counter - disabling an | ||
422 | non-leader stops that counter from counting but doesn't affect any | ||
423 | other counter. | ||
424 | |||
425 | Additionally, non-inherited overflow counters can use | ||
426 | |||
427 | ioctl(fd, PERF_COUNTER_IOC_REFRESH, nr); | ||
428 | |||
429 | to enable a counter for 'nr' events, after which it gets disabled again. | ||
430 | |||
431 | A process can enable or disable all the counter groups that are | ||
432 | attached to it, using prctl: | ||
433 | |||
434 | prctl(PR_TASK_PERF_COUNTERS_ENABLE); | ||
435 | |||
436 | prctl(PR_TASK_PERF_COUNTERS_DISABLE); | ||
437 | |||
438 | This applies to all counters on the current process, whether created | ||
439 | by this process or by another, and doesn't affect any counters that | ||
440 | this process has created on other processes. It only enables or | ||
441 | disables the group leaders, not any other members in the groups. | ||
442 | |||
diff --git a/tools/perf/perf.c b/tools/perf/perf.c new file mode 100644 index 000000000000..4eb725933703 --- /dev/null +++ b/tools/perf/perf.c | |||
@@ -0,0 +1,428 @@ | |||
1 | /* | ||
2 | * perf.c | ||
3 | * | ||
4 | * Performance analysis utility. | ||
5 | * | ||
6 | * This is the main hub from which the sub-commands (perf stat, | ||
7 | * perf top, perf record, perf report, etc.) are started. | ||
8 | */ | ||
9 | #include "builtin.h" | ||
10 | |||
11 | #include "util/exec_cmd.h" | ||
12 | #include "util/cache.h" | ||
13 | #include "util/quote.h" | ||
14 | #include "util/run-command.h" | ||
15 | |||
16 | const char perf_usage_string[] = | ||
17 | "perf [--version] [--help] COMMAND [ARGS]"; | ||
18 | |||
19 | const char perf_more_info_string[] = | ||
20 | "See 'perf help COMMAND' for more information on a specific command."; | ||
21 | |||
22 | static int use_pager = -1; | ||
23 | struct pager_config { | ||
24 | const char *cmd; | ||
25 | int val; | ||
26 | }; | ||
27 | |||
28 | static int pager_command_config(const char *var, const char *value, void *data) | ||
29 | { | ||
30 | struct pager_config *c = data; | ||
31 | if (!prefixcmp(var, "pager.") && !strcmp(var + 6, c->cmd)) | ||
32 | c->val = perf_config_bool(var, value); | ||
33 | return 0; | ||
34 | } | ||
35 | |||
36 | /* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */ | ||
37 | int check_pager_config(const char *cmd) | ||
38 | { | ||
39 | struct pager_config c; | ||
40 | c.cmd = cmd; | ||
41 | c.val = -1; | ||
42 | perf_config(pager_command_config, &c); | ||
43 | return c.val; | ||
44 | } | ||
45 | |||
46 | static void commit_pager_choice(void) { | ||
47 | switch (use_pager) { | ||
48 | case 0: | ||
49 | setenv("PERF_PAGER", "cat", 1); | ||
50 | break; | ||
51 | case 1: | ||
52 | /* setup_pager(); */ | ||
53 | break; | ||
54 | default: | ||
55 | break; | ||
56 | } | ||
57 | } | ||
58 | |||
59 | static int handle_options(const char*** argv, int* argc, int* envchanged) | ||
60 | { | ||
61 | int handled = 0; | ||
62 | |||
63 | while (*argc > 0) { | ||
64 | const char *cmd = (*argv)[0]; | ||
65 | if (cmd[0] != '-') | ||
66 | break; | ||
67 | |||
68 | /* | ||
69 | * For legacy reasons, the "version" and "help" | ||
70 | * commands can be written with "--" prepended | ||
71 | * to make them look like flags. | ||
72 | */ | ||
73 | if (!strcmp(cmd, "--help") || !strcmp(cmd, "--version")) | ||
74 | break; | ||
75 | |||
76 | /* | ||
77 | * Check remaining flags. | ||
78 | */ | ||
79 | if (!prefixcmp(cmd, "--exec-path")) { | ||
80 | cmd += 11; | ||
81 | if (*cmd == '=') | ||
82 | perf_set_argv_exec_path(cmd + 1); | ||
83 | else { | ||
84 | puts(perf_exec_path()); | ||
85 | exit(0); | ||
86 | } | ||
87 | } else if (!strcmp(cmd, "--html-path")) { | ||
88 | puts(system_path(PERF_HTML_PATH)); | ||
89 | exit(0); | ||
90 | } else if (!strcmp(cmd, "-p") || !strcmp(cmd, "--paginate")) { | ||
91 | use_pager = 1; | ||
92 | } else if (!strcmp(cmd, "--no-pager")) { | ||
93 | use_pager = 0; | ||
94 | if (envchanged) | ||
95 | *envchanged = 1; | ||
96 | } else if (!strcmp(cmd, "--perf-dir")) { | ||
97 | if (*argc < 2) { | ||
98 | fprintf(stderr, "No directory given for --perf-dir.\n" ); | ||
99 | usage(perf_usage_string); | ||
100 | } | ||
101 | setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1); | ||
102 | if (envchanged) | ||
103 | *envchanged = 1; | ||
104 | (*argv)++; | ||
105 | (*argc)--; | ||
106 | handled++; | ||
107 | } else if (!prefixcmp(cmd, "--perf-dir=")) { | ||
108 | setenv(PERF_DIR_ENVIRONMENT, cmd + 10, 1); | ||
109 | if (envchanged) | ||
110 | *envchanged = 1; | ||
111 | } else if (!strcmp(cmd, "--work-tree")) { | ||
112 | if (*argc < 2) { | ||
113 | fprintf(stderr, "No directory given for --work-tree.\n" ); | ||
114 | usage(perf_usage_string); | ||
115 | } | ||
116 | setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1); | ||
117 | if (envchanged) | ||
118 | *envchanged = 1; | ||
119 | (*argv)++; | ||
120 | (*argc)--; | ||
121 | } else if (!prefixcmp(cmd, "--work-tree=")) { | ||
122 | setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1); | ||
123 | if (envchanged) | ||
124 | *envchanged = 1; | ||
125 | } else { | ||
126 | fprintf(stderr, "Unknown option: %s\n", cmd); | ||
127 | usage(perf_usage_string); | ||
128 | } | ||
129 | |||
130 | (*argv)++; | ||
131 | (*argc)--; | ||
132 | handled++; | ||
133 | } | ||
134 | return handled; | ||
135 | } | ||
136 | |||
137 | static int handle_alias(int *argcp, const char ***argv) | ||
138 | { | ||
139 | int envchanged = 0, ret = 0, saved_errno = errno; | ||
140 | int count, option_count; | ||
141 | const char** new_argv; | ||
142 | const char *alias_command; | ||
143 | char *alias_string; | ||
144 | |||
145 | alias_command = (*argv)[0]; | ||
146 | alias_string = alias_lookup(alias_command); | ||
147 | if (alias_string) { | ||
148 | if (alias_string[0] == '!') { | ||
149 | if (*argcp > 1) { | ||
150 | struct strbuf buf; | ||
151 | |||
152 | strbuf_init(&buf, PATH_MAX); | ||
153 | strbuf_addstr(&buf, alias_string); | ||
154 | sq_quote_argv(&buf, (*argv) + 1, PATH_MAX); | ||
155 | free(alias_string); | ||
156 | alias_string = buf.buf; | ||
157 | } | ||
158 | ret = system(alias_string + 1); | ||
159 | if (ret >= 0 && WIFEXITED(ret) && | ||
160 | WEXITSTATUS(ret) != 127) | ||
161 | exit(WEXITSTATUS(ret)); | ||
162 | die("Failed to run '%s' when expanding alias '%s'", | ||
163 | alias_string + 1, alias_command); | ||
164 | } | ||
165 | count = split_cmdline(alias_string, &new_argv); | ||
166 | if (count < 0) | ||
167 | die("Bad alias.%s string", alias_command); | ||
168 | option_count = handle_options(&new_argv, &count, &envchanged); | ||
169 | if (envchanged) | ||
170 | die("alias '%s' changes environment variables\n" | ||
171 | "You can use '!perf' in the alias to do this.", | ||
172 | alias_command); | ||
173 | memmove(new_argv - option_count, new_argv, | ||
174 | count * sizeof(char *)); | ||
175 | new_argv -= option_count; | ||
176 | |||
177 | if (count < 1) | ||
178 | die("empty alias for %s", alias_command); | ||
179 | |||
180 | if (!strcmp(alias_command, new_argv[0])) | ||
181 | die("recursive alias: %s", alias_command); | ||
182 | |||
183 | new_argv = realloc(new_argv, sizeof(char*) * | ||
184 | (count + *argcp + 1)); | ||
185 | /* insert after command name */ | ||
186 | memcpy(new_argv + count, *argv + 1, sizeof(char*) * *argcp); | ||
187 | new_argv[count+*argcp] = NULL; | ||
188 | |||
189 | *argv = new_argv; | ||
190 | *argcp += count - 1; | ||
191 | |||
192 | ret = 1; | ||
193 | } | ||
194 | |||
195 | errno = saved_errno; | ||
196 | |||
197 | return ret; | ||
198 | } | ||
199 | |||
200 | const char perf_version_string[] = PERF_VERSION; | ||
201 | |||
202 | #define RUN_SETUP (1<<0) | ||
203 | #define USE_PAGER (1<<1) | ||
204 | /* | ||
205 | * require working tree to be present -- anything uses this needs | ||
206 | * RUN_SETUP for reading from the configuration file. | ||
207 | */ | ||
208 | #define NEED_WORK_TREE (1<<2) | ||
209 | |||
210 | struct cmd_struct { | ||
211 | const char *cmd; | ||
212 | int (*fn)(int, const char **, const char *); | ||
213 | int option; | ||
214 | }; | ||
215 | |||
216 | static int run_builtin(struct cmd_struct *p, int argc, const char **argv) | ||
217 | { | ||
218 | int status; | ||
219 | struct stat st; | ||
220 | const char *prefix; | ||
221 | |||
222 | prefix = NULL; | ||
223 | if (p->option & RUN_SETUP) | ||
224 | prefix = NULL; /* setup_perf_directory(); */ | ||
225 | |||
226 | if (use_pager == -1 && p->option & RUN_SETUP) | ||
227 | use_pager = check_pager_config(p->cmd); | ||
228 | if (use_pager == -1 && p->option & USE_PAGER) | ||
229 | use_pager = 1; | ||
230 | commit_pager_choice(); | ||
231 | |||
232 | if (p->option & NEED_WORK_TREE) | ||
233 | /* setup_work_tree() */; | ||
234 | |||
235 | status = p->fn(argc, argv, prefix); | ||
236 | if (status) | ||
237 | return status & 0xff; | ||
238 | |||
239 | /* Somebody closed stdout? */ | ||
240 | if (fstat(fileno(stdout), &st)) | ||
241 | return 0; | ||
242 | /* Ignore write errors for pipes and sockets.. */ | ||
243 | if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode)) | ||
244 | return 0; | ||
245 | |||
246 | /* Check for ENOSPC and EIO errors.. */ | ||
247 | if (fflush(stdout)) | ||
248 | die("write failure on standard output: %s", strerror(errno)); | ||
249 | if (ferror(stdout)) | ||
250 | die("unknown write failure on standard output"); | ||
251 | if (fclose(stdout)) | ||
252 | die("close failed on standard output: %s", strerror(errno)); | ||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | static void handle_internal_command(int argc, const char **argv) | ||
257 | { | ||
258 | const char *cmd = argv[0]; | ||
259 | static struct cmd_struct commands[] = { | ||
260 | { "help", cmd_help, 0 }, | ||
261 | { "list", cmd_list, 0 }, | ||
262 | { "record", cmd_record, 0 }, | ||
263 | { "report", cmd_report, 0 }, | ||
264 | { "stat", cmd_stat, 0 }, | ||
265 | { "top", cmd_top, 0 }, | ||
266 | { "annotate", cmd_annotate, 0 }, | ||
267 | { "version", cmd_version, 0 }, | ||
268 | }; | ||
269 | int i; | ||
270 | static const char ext[] = STRIP_EXTENSION; | ||
271 | |||
272 | if (sizeof(ext) > 1) { | ||
273 | i = strlen(argv[0]) - strlen(ext); | ||
274 | if (i > 0 && !strcmp(argv[0] + i, ext)) { | ||
275 | char *argv0 = strdup(argv[0]); | ||
276 | argv[0] = cmd = argv0; | ||
277 | argv0[i] = '\0'; | ||
278 | } | ||
279 | } | ||
280 | |||
281 | /* Turn "perf cmd --help" into "perf help cmd" */ | ||
282 | if (argc > 1 && !strcmp(argv[1], "--help")) { | ||
283 | argv[1] = argv[0]; | ||
284 | argv[0] = cmd = "help"; | ||
285 | } | ||
286 | |||
287 | for (i = 0; i < ARRAY_SIZE(commands); i++) { | ||
288 | struct cmd_struct *p = commands+i; | ||
289 | if (strcmp(p->cmd, cmd)) | ||
290 | continue; | ||
291 | exit(run_builtin(p, argc, argv)); | ||
292 | } | ||
293 | } | ||
294 | |||
295 | static void execv_dashed_external(const char **argv) | ||
296 | { | ||
297 | struct strbuf cmd = STRBUF_INIT; | ||
298 | const char *tmp; | ||
299 | int status; | ||
300 | |||
301 | strbuf_addf(&cmd, "perf-%s", argv[0]); | ||
302 | |||
303 | /* | ||
304 | * argv[0] must be the perf command, but the argv array | ||
305 | * belongs to the caller, and may be reused in | ||
306 | * subsequent loop iterations. Save argv[0] and | ||
307 | * restore it on error. | ||
308 | */ | ||
309 | tmp = argv[0]; | ||
310 | argv[0] = cmd.buf; | ||
311 | |||
312 | /* | ||
313 | * if we fail because the command is not found, it is | ||
314 | * OK to return. Otherwise, we just pass along the status code. | ||
315 | */ | ||
316 | status = run_command_v_opt(argv, 0); | ||
317 | if (status != -ERR_RUN_COMMAND_EXEC) { | ||
318 | if (IS_RUN_COMMAND_ERR(status)) | ||
319 | die("unable to run '%s'", argv[0]); | ||
320 | exit(-status); | ||
321 | } | ||
322 | errno = ENOENT; /* as if we called execvp */ | ||
323 | |||
324 | argv[0] = tmp; | ||
325 | |||
326 | strbuf_release(&cmd); | ||
327 | } | ||
328 | |||
329 | static int run_argv(int *argcp, const char ***argv) | ||
330 | { | ||
331 | int done_alias = 0; | ||
332 | |||
333 | while (1) { | ||
334 | /* See if it's an internal command */ | ||
335 | handle_internal_command(*argcp, *argv); | ||
336 | |||
337 | /* .. then try the external ones */ | ||
338 | execv_dashed_external(*argv); | ||
339 | |||
340 | /* It could be an alias -- this works around the insanity | ||
341 | * of overriding "perf log" with "perf show" by having | ||
342 | * alias.log = show | ||
343 | */ | ||
344 | if (done_alias || !handle_alias(argcp, argv)) | ||
345 | break; | ||
346 | done_alias = 1; | ||
347 | } | ||
348 | |||
349 | return done_alias; | ||
350 | } | ||
351 | |||
352 | |||
353 | int main(int argc, const char **argv) | ||
354 | { | ||
355 | const char *cmd; | ||
356 | |||
357 | cmd = perf_extract_argv0_path(argv[0]); | ||
358 | if (!cmd) | ||
359 | cmd = "perf-help"; | ||
360 | |||
361 | /* | ||
362 | * "perf-xxxx" is the same as "perf xxxx", but we obviously: | ||
363 | * | ||
364 | * - cannot take flags in between the "perf" and the "xxxx". | ||
365 | * - cannot execute it externally (since it would just do | ||
366 | * the same thing over again) | ||
367 | * | ||
368 | * So we just directly call the internal command handler, and | ||
369 | * die if that one cannot handle it. | ||
370 | */ | ||
371 | if (!prefixcmp(cmd, "perf-")) { | ||
372 | cmd += 5; | ||
373 | argv[0] = cmd; | ||
374 | handle_internal_command(argc, argv); | ||
375 | die("cannot handle %s internally", cmd); | ||
376 | } | ||
377 | |||
378 | /* Look for flags.. */ | ||
379 | argv++; | ||
380 | argc--; | ||
381 | handle_options(&argv, &argc, NULL); | ||
382 | commit_pager_choice(); | ||
383 | if (argc > 0) { | ||
384 | if (!prefixcmp(argv[0], "--")) | ||
385 | argv[0] += 2; | ||
386 | } else { | ||
387 | /* The user didn't specify a command; give them help */ | ||
388 | printf("\n usage: %s\n\n", perf_usage_string); | ||
389 | list_common_cmds_help(); | ||
390 | printf("\n %s\n\n", perf_more_info_string); | ||
391 | exit(1); | ||
392 | } | ||
393 | cmd = argv[0]; | ||
394 | |||
395 | /* | ||
396 | * We use PATH to find perf commands, but we prepend some higher | ||
397 | * precidence paths: the "--exec-path" option, the PERF_EXEC_PATH | ||
398 | * environment, and the $(perfexecdir) from the Makefile at build | ||
399 | * time. | ||
400 | */ | ||
401 | setup_path(); | ||
402 | |||
403 | while (1) { | ||
404 | static int done_help = 0; | ||
405 | static int was_alias = 0; | ||
406 | |||
407 | was_alias = run_argv(&argc, &argv); | ||
408 | if (errno != ENOENT) | ||
409 | break; | ||
410 | |||
411 | if (was_alias) { | ||
412 | fprintf(stderr, "Expansion of alias '%s' failed; " | ||
413 | "'%s' is not a perf-command\n", | ||
414 | cmd, argv[0]); | ||
415 | exit(1); | ||
416 | } | ||
417 | if (!done_help) { | ||
418 | cmd = argv[0] = help_unknown_cmd(cmd); | ||
419 | done_help = 1; | ||
420 | } else | ||
421 | break; | ||
422 | } | ||
423 | |||
424 | fprintf(stderr, "Failed to run command '%s': %s\n", | ||
425 | cmd, strerror(errno)); | ||
426 | |||
427 | return 1; | ||
428 | } | ||
diff --git a/tools/perf/perf.h b/tools/perf/perf.h new file mode 100644 index 000000000000..af0a5046d743 --- /dev/null +++ b/tools/perf/perf.h | |||
@@ -0,0 +1,67 @@ | |||
1 | #ifndef _PERF_PERF_H | ||
2 | #define _PERF_PERF_H | ||
3 | |||
4 | #if defined(__x86_64__) || defined(__i386__) | ||
5 | #include "../../arch/x86/include/asm/unistd.h" | ||
6 | #define rmb() asm volatile("lfence" ::: "memory") | ||
7 | #define cpu_relax() asm volatile("rep; nop" ::: "memory"); | ||
8 | #endif | ||
9 | |||
10 | #ifdef __powerpc__ | ||
11 | #include "../../arch/powerpc/include/asm/unistd.h" | ||
12 | #define rmb() asm volatile ("sync" ::: "memory") | ||
13 | #define cpu_relax() asm volatile ("" ::: "memory"); | ||
14 | #endif | ||
15 | |||
16 | #include <time.h> | ||
17 | #include <unistd.h> | ||
18 | #include <sys/types.h> | ||
19 | #include <sys/syscall.h> | ||
20 | |||
21 | #include "../../include/linux/perf_counter.h" | ||
22 | |||
23 | /* | ||
24 | * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all | ||
25 | * counters in the current task. | ||
26 | */ | ||
27 | #define PR_TASK_PERF_COUNTERS_DISABLE 31 | ||
28 | #define PR_TASK_PERF_COUNTERS_ENABLE 32 | ||
29 | |||
30 | #ifndef NSEC_PER_SEC | ||
31 | # define NSEC_PER_SEC 1000000000ULL | ||
32 | #endif | ||
33 | |||
34 | static inline unsigned long long rdclock(void) | ||
35 | { | ||
36 | struct timespec ts; | ||
37 | |||
38 | clock_gettime(CLOCK_MONOTONIC, &ts); | ||
39 | return ts.tv_sec * 1000000000ULL + ts.tv_nsec; | ||
40 | } | ||
41 | |||
42 | /* | ||
43 | * Pick up some kernel type conventions: | ||
44 | */ | ||
45 | #define __user | ||
46 | #define asmlinkage | ||
47 | |||
48 | #define unlikely(x) __builtin_expect(!!(x), 0) | ||
49 | #define min(x, y) ({ \ | ||
50 | typeof(x) _min1 = (x); \ | ||
51 | typeof(y) _min2 = (y); \ | ||
52 | (void) (&_min1 == &_min2); \ | ||
53 | _min1 < _min2 ? _min1 : _min2; }) | ||
54 | |||
55 | static inline int | ||
56 | sys_perf_counter_open(struct perf_counter_attr *attr_uptr, | ||
57 | pid_t pid, int cpu, int group_fd, | ||
58 | unsigned long flags) | ||
59 | { | ||
60 | return syscall(__NR_perf_counter_open, attr_uptr, pid, cpu, | ||
61 | group_fd, flags); | ||
62 | } | ||
63 | |||
64 | #define MAX_COUNTERS 256 | ||
65 | #define MAX_NR_CPUS 256 | ||
66 | |||
67 | #endif | ||
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN new file mode 100755 index 000000000000..c561d1538c03 --- /dev/null +++ b/tools/perf/util/PERF-VERSION-GEN | |||
@@ -0,0 +1,42 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | GVF=PERF-VERSION-FILE | ||
4 | DEF_VER=v0.0.1.PERF | ||
5 | |||
6 | LF=' | ||
7 | ' | ||
8 | |||
9 | # First see if there is a version file (included in release tarballs), | ||
10 | # then try git-describe, then default. | ||
11 | if test -f version | ||
12 | then | ||
13 | VN=$(cat version) || VN="$DEF_VER" | ||
14 | elif test -d .git -o -f .git && | ||
15 | VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && | ||
16 | case "$VN" in | ||
17 | *$LF*) (exit 1) ;; | ||
18 | v[0-9]*) | ||
19 | git update-index -q --refresh | ||
20 | test -z "$(git diff-index --name-only HEAD --)" || | ||
21 | VN="$VN-dirty" ;; | ||
22 | esac | ||
23 | then | ||
24 | VN=$(echo "$VN" | sed -e 's/-/./g'); | ||
25 | else | ||
26 | VN="$DEF_VER" | ||
27 | fi | ||
28 | |||
29 | VN=$(expr "$VN" : v*'\(.*\)') | ||
30 | |||
31 | if test -r $GVF | ||
32 | then | ||
33 | VC=$(sed -e 's/^PERF_VERSION = //' <$GVF) | ||
34 | else | ||
35 | VC=unset | ||
36 | fi | ||
37 | test "$VN" = "$VC" || { | ||
38 | echo >&2 "PERF_VERSION = $VN" | ||
39 | echo "PERF_VERSION = $VN" >$GVF | ||
40 | } | ||
41 | |||
42 | |||
diff --git a/tools/perf/util/abspath.c b/tools/perf/util/abspath.c new file mode 100644 index 000000000000..61d33b81fc97 --- /dev/null +++ b/tools/perf/util/abspath.c | |||
@@ -0,0 +1,117 @@ | |||
1 | #include "cache.h" | ||
2 | |||
3 | /* | ||
4 | * Do not use this for inspecting *tracked* content. When path is a | ||
5 | * symlink to a directory, we do not want to say it is a directory when | ||
6 | * dealing with tracked content in the working tree. | ||
7 | */ | ||
8 | static int is_directory(const char *path) | ||
9 | { | ||
10 | struct stat st; | ||
11 | return (!stat(path, &st) && S_ISDIR(st.st_mode)); | ||
12 | } | ||
13 | |||
14 | /* We allow "recursive" symbolic links. Only within reason, though. */ | ||
15 | #define MAXDEPTH 5 | ||
16 | |||
17 | const char *make_absolute_path(const char *path) | ||
18 | { | ||
19 | static char bufs[2][PATH_MAX + 1], *buf = bufs[0], *next_buf = bufs[1]; | ||
20 | char cwd[1024] = ""; | ||
21 | int buf_index = 1, len; | ||
22 | |||
23 | int depth = MAXDEPTH; | ||
24 | char *last_elem = NULL; | ||
25 | struct stat st; | ||
26 | |||
27 | if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) | ||
28 | die ("Too long path: %.*s", 60, path); | ||
29 | |||
30 | while (depth--) { | ||
31 | if (!is_directory(buf)) { | ||
32 | char *last_slash = strrchr(buf, '/'); | ||
33 | if (last_slash) { | ||
34 | *last_slash = '\0'; | ||
35 | last_elem = xstrdup(last_slash + 1); | ||
36 | } else { | ||
37 | last_elem = xstrdup(buf); | ||
38 | *buf = '\0'; | ||
39 | } | ||
40 | } | ||
41 | |||
42 | if (*buf) { | ||
43 | if (!*cwd && !getcwd(cwd, sizeof(cwd))) | ||
44 | die ("Could not get current working directory"); | ||
45 | |||
46 | if (chdir(buf)) | ||
47 | die ("Could not switch to '%s'", buf); | ||
48 | } | ||
49 | if (!getcwd(buf, PATH_MAX)) | ||
50 | die ("Could not get current working directory"); | ||
51 | |||
52 | if (last_elem) { | ||
53 | int len = strlen(buf); | ||
54 | if (len + strlen(last_elem) + 2 > PATH_MAX) | ||
55 | die ("Too long path name: '%s/%s'", | ||
56 | buf, last_elem); | ||
57 | buf[len] = '/'; | ||
58 | strcpy(buf + len + 1, last_elem); | ||
59 | free(last_elem); | ||
60 | last_elem = NULL; | ||
61 | } | ||
62 | |||
63 | if (!lstat(buf, &st) && S_ISLNK(st.st_mode)) { | ||
64 | len = readlink(buf, next_buf, PATH_MAX); | ||
65 | if (len < 0) | ||
66 | die ("Invalid symlink: %s", buf); | ||
67 | if (PATH_MAX <= len) | ||
68 | die("symbolic link too long: %s", buf); | ||
69 | next_buf[len] = '\0'; | ||
70 | buf = next_buf; | ||
71 | buf_index = 1 - buf_index; | ||
72 | next_buf = bufs[buf_index]; | ||
73 | } else | ||
74 | break; | ||
75 | } | ||
76 | |||
77 | if (*cwd && chdir(cwd)) | ||
78 | die ("Could not change back to '%s'", cwd); | ||
79 | |||
80 | return buf; | ||
81 | } | ||
82 | |||
83 | static const char *get_pwd_cwd(void) | ||
84 | { | ||
85 | static char cwd[PATH_MAX + 1]; | ||
86 | char *pwd; | ||
87 | struct stat cwd_stat, pwd_stat; | ||
88 | if (getcwd(cwd, PATH_MAX) == NULL) | ||
89 | return NULL; | ||
90 | pwd = getenv("PWD"); | ||
91 | if (pwd && strcmp(pwd, cwd)) { | ||
92 | stat(cwd, &cwd_stat); | ||
93 | if (!stat(pwd, &pwd_stat) && | ||
94 | pwd_stat.st_dev == cwd_stat.st_dev && | ||
95 | pwd_stat.st_ino == cwd_stat.st_ino) { | ||
96 | strlcpy(cwd, pwd, PATH_MAX); | ||
97 | } | ||
98 | } | ||
99 | return cwd; | ||
100 | } | ||
101 | |||
102 | const char *make_nonrelative_path(const char *path) | ||
103 | { | ||
104 | static char buf[PATH_MAX + 1]; | ||
105 | |||
106 | if (is_absolute_path(path)) { | ||
107 | if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) | ||
108 | die("Too long path: %.*s", 60, path); | ||
109 | } else { | ||
110 | const char *cwd = get_pwd_cwd(); | ||
111 | if (!cwd) | ||
112 | die("Cannot determine the current working directory"); | ||
113 | if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX) | ||
114 | die("Too long path: %.*s", 60, path); | ||
115 | } | ||
116 | return buf; | ||
117 | } | ||
diff --git a/tools/perf/util/alias.c b/tools/perf/util/alias.c new file mode 100644 index 000000000000..9b3dd2b428df --- /dev/null +++ b/tools/perf/util/alias.c | |||
@@ -0,0 +1,77 @@ | |||
1 | #include "cache.h" | ||
2 | |||
3 | static const char *alias_key; | ||
4 | static char *alias_val; | ||
5 | |||
6 | static int alias_lookup_cb(const char *k, const char *v, void *cb) | ||
7 | { | ||
8 | if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) { | ||
9 | if (!v) | ||
10 | return config_error_nonbool(k); | ||
11 | alias_val = strdup(v); | ||
12 | return 0; | ||
13 | } | ||
14 | return 0; | ||
15 | } | ||
16 | |||
17 | char *alias_lookup(const char *alias) | ||
18 | { | ||
19 | alias_key = alias; | ||
20 | alias_val = NULL; | ||
21 | perf_config(alias_lookup_cb, NULL); | ||
22 | return alias_val; | ||
23 | } | ||
24 | |||
25 | int split_cmdline(char *cmdline, const char ***argv) | ||
26 | { | ||
27 | int src, dst, count = 0, size = 16; | ||
28 | char quoted = 0; | ||
29 | |||
30 | *argv = malloc(sizeof(char*) * size); | ||
31 | |||
32 | /* split alias_string */ | ||
33 | (*argv)[count++] = cmdline; | ||
34 | for (src = dst = 0; cmdline[src];) { | ||
35 | char c = cmdline[src]; | ||
36 | if (!quoted && isspace(c)) { | ||
37 | cmdline[dst++] = 0; | ||
38 | while (cmdline[++src] | ||
39 | && isspace(cmdline[src])) | ||
40 | ; /* skip */ | ||
41 | if (count >= size) { | ||
42 | size += 16; | ||
43 | *argv = realloc(*argv, sizeof(char*) * size); | ||
44 | } | ||
45 | (*argv)[count++] = cmdline + dst; | ||
46 | } else if (!quoted && (c == '\'' || c == '"')) { | ||
47 | quoted = c; | ||
48 | src++; | ||
49 | } else if (c == quoted) { | ||
50 | quoted = 0; | ||
51 | src++; | ||
52 | } else { | ||
53 | if (c == '\\' && quoted != '\'') { | ||
54 | src++; | ||
55 | c = cmdline[src]; | ||
56 | if (!c) { | ||
57 | free(*argv); | ||
58 | *argv = NULL; | ||
59 | return error("cmdline ends with \\"); | ||
60 | } | ||
61 | } | ||
62 | cmdline[dst++] = c; | ||
63 | src++; | ||
64 | } | ||
65 | } | ||
66 | |||
67 | cmdline[dst] = 0; | ||
68 | |||
69 | if (quoted) { | ||
70 | free(*argv); | ||
71 | *argv = NULL; | ||
72 | return error("unclosed quote"); | ||
73 | } | ||
74 | |||
75 | return count; | ||
76 | } | ||
77 | |||
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h new file mode 100644 index 000000000000..393d6146d13b --- /dev/null +++ b/tools/perf/util/cache.h | |||
@@ -0,0 +1,119 @@ | |||
1 | #ifndef CACHE_H | ||
2 | #define CACHE_H | ||
3 | |||
4 | #include "util.h" | ||
5 | #include "strbuf.h" | ||
6 | |||
7 | #define PERF_DIR_ENVIRONMENT "PERF_DIR" | ||
8 | #define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE" | ||
9 | #define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" | ||
10 | #define DB_ENVIRONMENT "PERF_OBJECT_DIRECTORY" | ||
11 | #define INDEX_ENVIRONMENT "PERF_INDEX_FILE" | ||
12 | #define GRAFT_ENVIRONMENT "PERF_GRAFT_FILE" | ||
13 | #define TEMPLATE_DIR_ENVIRONMENT "PERF_TEMPLATE_DIR" | ||
14 | #define CONFIG_ENVIRONMENT "PERF_CONFIG" | ||
15 | #define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH" | ||
16 | #define CEILING_DIRECTORIES_ENVIRONMENT "PERF_CEILING_DIRECTORIES" | ||
17 | #define PERFATTRIBUTES_FILE ".perfattributes" | ||
18 | #define INFOATTRIBUTES_FILE "info/attributes" | ||
19 | #define ATTRIBUTE_MACRO_PREFIX "[attr]" | ||
20 | |||
21 | typedef int (*config_fn_t)(const char *, const char *, void *); | ||
22 | extern int perf_default_config(const char *, const char *, void *); | ||
23 | extern int perf_config_from_file(config_fn_t fn, const char *, void *); | ||
24 | extern int perf_config(config_fn_t fn, void *); | ||
25 | extern int perf_parse_ulong(const char *, unsigned long *); | ||
26 | extern int perf_config_int(const char *, const char *); | ||
27 | extern unsigned long perf_config_ulong(const char *, const char *); | ||
28 | extern int perf_config_bool_or_int(const char *, const char *, int *); | ||
29 | extern int perf_config_bool(const char *, const char *); | ||
30 | extern int perf_config_string(const char **, const char *, const char *); | ||
31 | extern int perf_config_set(const char *, const char *); | ||
32 | extern int perf_config_set_multivar(const char *, const char *, const char *, int); | ||
33 | extern int perf_config_rename_section(const char *, const char *); | ||
34 | extern const char *perf_etc_perfconfig(void); | ||
35 | extern int check_repository_format_version(const char *var, const char *value, void *cb); | ||
36 | extern int perf_config_system(void); | ||
37 | extern int perf_config_global(void); | ||
38 | extern int config_error_nonbool(const char *); | ||
39 | extern const char *config_exclusive_filename; | ||
40 | |||
41 | #define MAX_PERFNAME (1000) | ||
42 | extern char perf_default_email[MAX_PERFNAME]; | ||
43 | extern char perf_default_name[MAX_PERFNAME]; | ||
44 | extern int user_ident_explicitly_given; | ||
45 | |||
46 | extern const char *perf_log_output_encoding; | ||
47 | extern const char *perf_mailmap_file; | ||
48 | |||
49 | /* IO helper functions */ | ||
50 | extern void maybe_flush_or_die(FILE *, const char *); | ||
51 | extern int copy_fd(int ifd, int ofd); | ||
52 | extern int copy_file(const char *dst, const char *src, int mode); | ||
53 | extern ssize_t read_in_full(int fd, void *buf, size_t count); | ||
54 | extern ssize_t write_in_full(int fd, const void *buf, size_t count); | ||
55 | extern void write_or_die(int fd, const void *buf, size_t count); | ||
56 | extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg); | ||
57 | extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg); | ||
58 | extern void fsync_or_die(int fd, const char *); | ||
59 | |||
60 | /* pager.c */ | ||
61 | extern void setup_pager(void); | ||
62 | extern const char *pager_program; | ||
63 | extern int pager_in_use(void); | ||
64 | extern int pager_use_color; | ||
65 | |||
66 | extern const char *editor_program; | ||
67 | extern const char *excludes_file; | ||
68 | |||
69 | char *alias_lookup(const char *alias); | ||
70 | int split_cmdline(char *cmdline, const char ***argv); | ||
71 | |||
72 | #define alloc_nr(x) (((x)+16)*3/2) | ||
73 | |||
74 | /* | ||
75 | * Realloc the buffer pointed at by variable 'x' so that it can hold | ||
76 | * at least 'nr' entries; the number of entries currently allocated | ||
77 | * is 'alloc', using the standard growing factor alloc_nr() macro. | ||
78 | * | ||
79 | * DO NOT USE any expression with side-effect for 'x' or 'alloc'. | ||
80 | */ | ||
81 | #define ALLOC_GROW(x, nr, alloc) \ | ||
82 | do { \ | ||
83 | if ((nr) > alloc) { \ | ||
84 | if (alloc_nr(alloc) < (nr)) \ | ||
85 | alloc = (nr); \ | ||
86 | else \ | ||
87 | alloc = alloc_nr(alloc); \ | ||
88 | x = xrealloc((x), alloc * sizeof(*(x))); \ | ||
89 | } \ | ||
90 | } while(0) | ||
91 | |||
92 | |||
93 | static inline int is_absolute_path(const char *path) | ||
94 | { | ||
95 | return path[0] == '/'; | ||
96 | } | ||
97 | |||
98 | const char *make_absolute_path(const char *path); | ||
99 | const char *make_nonrelative_path(const char *path); | ||
100 | const char *make_relative_path(const char *abs, const char *base); | ||
101 | int normalize_path_copy(char *dst, const char *src); | ||
102 | int longest_ancestor_length(const char *path, const char *prefix_list); | ||
103 | char *strip_path_suffix(const char *path, const char *suffix); | ||
104 | |||
105 | extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); | ||
106 | extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); | ||
107 | /* perf_mkstemp() - create tmp file honoring TMPDIR variable */ | ||
108 | extern int perf_mkstemp(char *path, size_t len, const char *template); | ||
109 | |||
110 | extern char *mksnpath(char *buf, size_t n, const char *fmt, ...) | ||
111 | __attribute__((format (printf, 3, 4))); | ||
112 | extern char *perf_snpath(char *buf, size_t n, const char *fmt, ...) | ||
113 | __attribute__((format (printf, 3, 4))); | ||
114 | extern char *perf_pathdup(const char *fmt, ...) | ||
115 | __attribute__((format (printf, 1, 2))); | ||
116 | |||
117 | extern size_t strlcpy(char *dest, const char *src, size_t size); | ||
118 | |||
119 | #endif /* CACHE_H */ | ||
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c new file mode 100644 index 000000000000..9a8c20ccc53e --- /dev/null +++ b/tools/perf/util/color.c | |||
@@ -0,0 +1,241 @@ | |||
1 | #include "cache.h" | ||
2 | #include "color.h" | ||
3 | |||
4 | int perf_use_color_default = -1; | ||
5 | |||
6 | static int parse_color(const char *name, int len) | ||
7 | { | ||
8 | static const char * const color_names[] = { | ||
9 | "normal", "black", "red", "green", "yellow", | ||
10 | "blue", "magenta", "cyan", "white" | ||
11 | }; | ||
12 | char *end; | ||
13 | int i; | ||
14 | for (i = 0; i < ARRAY_SIZE(color_names); i++) { | ||
15 | const char *str = color_names[i]; | ||
16 | if (!strncasecmp(name, str, len) && !str[len]) | ||
17 | return i - 1; | ||
18 | } | ||
19 | i = strtol(name, &end, 10); | ||
20 | if (end - name == len && i >= -1 && i <= 255) | ||
21 | return i; | ||
22 | return -2; | ||
23 | } | ||
24 | |||
25 | static int parse_attr(const char *name, int len) | ||
26 | { | ||
27 | static const int attr_values[] = { 1, 2, 4, 5, 7 }; | ||
28 | static const char * const attr_names[] = { | ||
29 | "bold", "dim", "ul", "blink", "reverse" | ||
30 | }; | ||
31 | int i; | ||
32 | for (i = 0; i < ARRAY_SIZE(attr_names); i++) { | ||
33 | const char *str = attr_names[i]; | ||
34 | if (!strncasecmp(name, str, len) && !str[len]) | ||
35 | return attr_values[i]; | ||
36 | } | ||
37 | return -1; | ||
38 | } | ||
39 | |||
40 | void color_parse(const char *value, const char *var, char *dst) | ||
41 | { | ||
42 | color_parse_mem(value, strlen(value), var, dst); | ||
43 | } | ||
44 | |||
45 | void color_parse_mem(const char *value, int value_len, const char *var, | ||
46 | char *dst) | ||
47 | { | ||
48 | const char *ptr = value; | ||
49 | int len = value_len; | ||
50 | int attr = -1; | ||
51 | int fg = -2; | ||
52 | int bg = -2; | ||
53 | |||
54 | if (!strncasecmp(value, "reset", len)) { | ||
55 | strcpy(dst, PERF_COLOR_RESET); | ||
56 | return; | ||
57 | } | ||
58 | |||
59 | /* [fg [bg]] [attr] */ | ||
60 | while (len > 0) { | ||
61 | const char *word = ptr; | ||
62 | int val, wordlen = 0; | ||
63 | |||
64 | while (len > 0 && !isspace(word[wordlen])) { | ||
65 | wordlen++; | ||
66 | len--; | ||
67 | } | ||
68 | |||
69 | ptr = word + wordlen; | ||
70 | while (len > 0 && isspace(*ptr)) { | ||
71 | ptr++; | ||
72 | len--; | ||
73 | } | ||
74 | |||
75 | val = parse_color(word, wordlen); | ||
76 | if (val >= -1) { | ||
77 | if (fg == -2) { | ||
78 | fg = val; | ||
79 | continue; | ||
80 | } | ||
81 | if (bg == -2) { | ||
82 | bg = val; | ||
83 | continue; | ||
84 | } | ||
85 | goto bad; | ||
86 | } | ||
87 | val = parse_attr(word, wordlen); | ||
88 | if (val < 0 || attr != -1) | ||
89 | goto bad; | ||
90 | attr = val; | ||
91 | } | ||
92 | |||
93 | if (attr >= 0 || fg >= 0 || bg >= 0) { | ||
94 | int sep = 0; | ||
95 | |||
96 | *dst++ = '\033'; | ||
97 | *dst++ = '['; | ||
98 | if (attr >= 0) { | ||
99 | *dst++ = '0' + attr; | ||
100 | sep++; | ||
101 | } | ||
102 | if (fg >= 0) { | ||
103 | if (sep++) | ||
104 | *dst++ = ';'; | ||
105 | if (fg < 8) { | ||
106 | *dst++ = '3'; | ||
107 | *dst++ = '0' + fg; | ||
108 | } else { | ||
109 | dst += sprintf(dst, "38;5;%d", fg); | ||
110 | } | ||
111 | } | ||
112 | if (bg >= 0) { | ||
113 | if (sep++) | ||
114 | *dst++ = ';'; | ||
115 | if (bg < 8) { | ||
116 | *dst++ = '4'; | ||
117 | *dst++ = '0' + bg; | ||
118 | } else { | ||
119 | dst += sprintf(dst, "48;5;%d", bg); | ||
120 | } | ||
121 | } | ||
122 | *dst++ = 'm'; | ||
123 | } | ||
124 | *dst = 0; | ||
125 | return; | ||
126 | bad: | ||
127 | die("bad color value '%.*s' for variable '%s'", value_len, value, var); | ||
128 | } | ||
129 | |||
130 | int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) | ||
131 | { | ||
132 | if (value) { | ||
133 | if (!strcasecmp(value, "never")) | ||
134 | return 0; | ||
135 | if (!strcasecmp(value, "always")) | ||
136 | return 1; | ||
137 | if (!strcasecmp(value, "auto")) | ||
138 | goto auto_color; | ||
139 | } | ||
140 | |||
141 | /* Missing or explicit false to turn off colorization */ | ||
142 | if (!perf_config_bool(var, value)) | ||
143 | return 0; | ||
144 | |||
145 | /* any normal truth value defaults to 'auto' */ | ||
146 | auto_color: | ||
147 | if (stdout_is_tty < 0) | ||
148 | stdout_is_tty = isatty(1); | ||
149 | if (stdout_is_tty || (pager_in_use() && pager_use_color)) { | ||
150 | char *term = getenv("TERM"); | ||
151 | if (term && strcmp(term, "dumb")) | ||
152 | return 1; | ||
153 | } | ||
154 | return 0; | ||
155 | } | ||
156 | |||
157 | int perf_color_default_config(const char *var, const char *value, void *cb) | ||
158 | { | ||
159 | if (!strcmp(var, "color.ui")) { | ||
160 | perf_use_color_default = perf_config_colorbool(var, value, -1); | ||
161 | return 0; | ||
162 | } | ||
163 | |||
164 | return perf_default_config(var, value, cb); | ||
165 | } | ||
166 | |||
167 | static int color_vfprintf(FILE *fp, const char *color, const char *fmt, | ||
168 | va_list args, const char *trail) | ||
169 | { | ||
170 | int r = 0; | ||
171 | |||
172 | /* | ||
173 | * Auto-detect: | ||
174 | */ | ||
175 | if (perf_use_color_default < 0) { | ||
176 | if (isatty(1) || pager_in_use()) | ||
177 | perf_use_color_default = 1; | ||
178 | else | ||
179 | perf_use_color_default = 0; | ||
180 | } | ||
181 | |||
182 | if (perf_use_color_default && *color) | ||
183 | r += fprintf(fp, "%s", color); | ||
184 | r += vfprintf(fp, fmt, args); | ||
185 | if (perf_use_color_default && *color) | ||
186 | r += fprintf(fp, "%s", PERF_COLOR_RESET); | ||
187 | if (trail) | ||
188 | r += fprintf(fp, "%s", trail); | ||
189 | return r; | ||
190 | } | ||
191 | |||
192 | |||
193 | |||
194 | int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) | ||
195 | { | ||
196 | va_list args; | ||
197 | int r; | ||
198 | |||
199 | va_start(args, fmt); | ||
200 | r = color_vfprintf(fp, color, fmt, args, NULL); | ||
201 | va_end(args); | ||
202 | return r; | ||
203 | } | ||
204 | |||
205 | int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...) | ||
206 | { | ||
207 | va_list args; | ||
208 | int r; | ||
209 | va_start(args, fmt); | ||
210 | r = color_vfprintf(fp, color, fmt, args, "\n"); | ||
211 | va_end(args); | ||
212 | return r; | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * This function splits the buffer by newlines and colors the lines individually. | ||
217 | * | ||
218 | * Returns 0 on success. | ||
219 | */ | ||
220 | int color_fwrite_lines(FILE *fp, const char *color, | ||
221 | size_t count, const char *buf) | ||
222 | { | ||
223 | if (!*color) | ||
224 | return fwrite(buf, count, 1, fp) != 1; | ||
225 | while (count) { | ||
226 | char *p = memchr(buf, '\n', count); | ||
227 | if (p != buf && (fputs(color, fp) < 0 || | ||
228 | fwrite(buf, p ? p - buf : count, 1, fp) != 1 || | ||
229 | fputs(PERF_COLOR_RESET, fp) < 0)) | ||
230 | return -1; | ||
231 | if (!p) | ||
232 | return 0; | ||
233 | if (fputc('\n', fp) < 0) | ||
234 | return -1; | ||
235 | count -= p + 1 - buf; | ||
236 | buf = p + 1; | ||
237 | } | ||
238 | return 0; | ||
239 | } | ||
240 | |||
241 | |||
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h new file mode 100644 index 000000000000..5abfd379582b --- /dev/null +++ b/tools/perf/util/color.h | |||
@@ -0,0 +1,36 @@ | |||
1 | #ifndef COLOR_H | ||
2 | #define COLOR_H | ||
3 | |||
4 | /* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */ | ||
5 | #define COLOR_MAXLEN 24 | ||
6 | |||
7 | #define PERF_COLOR_NORMAL "" | ||
8 | #define PERF_COLOR_RESET "\033[m" | ||
9 | #define PERF_COLOR_BOLD "\033[1m" | ||
10 | #define PERF_COLOR_RED "\033[31m" | ||
11 | #define PERF_COLOR_GREEN "\033[32m" | ||
12 | #define PERF_COLOR_YELLOW "\033[33m" | ||
13 | #define PERF_COLOR_BLUE "\033[34m" | ||
14 | #define PERF_COLOR_MAGENTA "\033[35m" | ||
15 | #define PERF_COLOR_CYAN "\033[36m" | ||
16 | #define PERF_COLOR_BG_RED "\033[41m" | ||
17 | |||
18 | /* | ||
19 | * This variable stores the value of color.ui | ||
20 | */ | ||
21 | extern int perf_use_color_default; | ||
22 | |||
23 | |||
24 | /* | ||
25 | * Use this instead of perf_default_config if you need the value of color.ui. | ||
26 | */ | ||
27 | int perf_color_default_config(const char *var, const char *value, void *cb); | ||
28 | |||
29 | int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty); | ||
30 | void color_parse(const char *value, const char *var, char *dst); | ||
31 | void color_parse_mem(const char *value, int len, const char *var, char *dst); | ||
32 | int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); | ||
33 | int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...); | ||
34 | int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); | ||
35 | |||
36 | #endif /* COLOR_H */ | ||
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c new file mode 100644 index 000000000000..3dd13faa6a27 --- /dev/null +++ b/tools/perf/util/config.c | |||
@@ -0,0 +1,873 @@ | |||
1 | /* | ||
2 | * GIT - The information manager from hell | ||
3 | * | ||
4 | * Copyright (C) Linus Torvalds, 2005 | ||
5 | * Copyright (C) Johannes Schindelin, 2005 | ||
6 | * | ||
7 | */ | ||
8 | #include "util.h" | ||
9 | #include "cache.h" | ||
10 | #include "exec_cmd.h" | ||
11 | |||
12 | #define MAXNAME (256) | ||
13 | |||
14 | static FILE *config_file; | ||
15 | static const char *config_file_name; | ||
16 | static int config_linenr; | ||
17 | static int config_file_eof; | ||
18 | |||
19 | const char *config_exclusive_filename = NULL; | ||
20 | |||
21 | static int get_next_char(void) | ||
22 | { | ||
23 | int c; | ||
24 | FILE *f; | ||
25 | |||
26 | c = '\n'; | ||
27 | if ((f = config_file) != NULL) { | ||
28 | c = fgetc(f); | ||
29 | if (c == '\r') { | ||
30 | /* DOS like systems */ | ||
31 | c = fgetc(f); | ||
32 | if (c != '\n') { | ||
33 | ungetc(c, f); | ||
34 | c = '\r'; | ||
35 | } | ||
36 | } | ||
37 | if (c == '\n') | ||
38 | config_linenr++; | ||
39 | if (c == EOF) { | ||
40 | config_file_eof = 1; | ||
41 | c = '\n'; | ||
42 | } | ||
43 | } | ||
44 | return c; | ||
45 | } | ||
46 | |||
47 | static char *parse_value(void) | ||
48 | { | ||
49 | static char value[1024]; | ||
50 | int quote = 0, comment = 0, len = 0, space = 0; | ||
51 | |||
52 | for (;;) { | ||
53 | int c = get_next_char(); | ||
54 | if (len >= sizeof(value) - 1) | ||
55 | return NULL; | ||
56 | if (c == '\n') { | ||
57 | if (quote) | ||
58 | return NULL; | ||
59 | value[len] = 0; | ||
60 | return value; | ||
61 | } | ||
62 | if (comment) | ||
63 | continue; | ||
64 | if (isspace(c) && !quote) { | ||
65 | space = 1; | ||
66 | continue; | ||
67 | } | ||
68 | if (!quote) { | ||
69 | if (c == ';' || c == '#') { | ||
70 | comment = 1; | ||
71 | continue; | ||
72 | } | ||
73 | } | ||
74 | if (space) { | ||
75 | if (len) | ||
76 | value[len++] = ' '; | ||
77 | space = 0; | ||
78 | } | ||
79 | if (c == '\\') { | ||
80 | c = get_next_char(); | ||
81 | switch (c) { | ||
82 | case '\n': | ||
83 | continue; | ||
84 | case 't': | ||
85 | c = '\t'; | ||
86 | break; | ||
87 | case 'b': | ||
88 | c = '\b'; | ||
89 | break; | ||
90 | case 'n': | ||
91 | c = '\n'; | ||
92 | break; | ||
93 | /* Some characters escape as themselves */ | ||
94 | case '\\': case '"': | ||
95 | break; | ||
96 | /* Reject unknown escape sequences */ | ||
97 | default: | ||
98 | return NULL; | ||
99 | } | ||
100 | value[len++] = c; | ||
101 | continue; | ||
102 | } | ||
103 | if (c == '"') { | ||
104 | quote = 1-quote; | ||
105 | continue; | ||
106 | } | ||
107 | value[len++] = c; | ||
108 | } | ||
109 | } | ||
110 | |||
111 | static inline int iskeychar(int c) | ||
112 | { | ||
113 | return isalnum(c) || c == '-'; | ||
114 | } | ||
115 | |||
116 | static int get_value(config_fn_t fn, void *data, char *name, unsigned int len) | ||
117 | { | ||
118 | int c; | ||
119 | char *value; | ||
120 | |||
121 | /* Get the full name */ | ||
122 | for (;;) { | ||
123 | c = get_next_char(); | ||
124 | if (config_file_eof) | ||
125 | break; | ||
126 | if (!iskeychar(c)) | ||
127 | break; | ||
128 | name[len++] = tolower(c); | ||
129 | if (len >= MAXNAME) | ||
130 | return -1; | ||
131 | } | ||
132 | name[len] = 0; | ||
133 | while (c == ' ' || c == '\t') | ||
134 | c = get_next_char(); | ||
135 | |||
136 | value = NULL; | ||
137 | if (c != '\n') { | ||
138 | if (c != '=') | ||
139 | return -1; | ||
140 | value = parse_value(); | ||
141 | if (!value) | ||
142 | return -1; | ||
143 | } | ||
144 | return fn(name, value, data); | ||
145 | } | ||
146 | |||
147 | static int get_extended_base_var(char *name, int baselen, int c) | ||
148 | { | ||
149 | do { | ||
150 | if (c == '\n') | ||
151 | return -1; | ||
152 | c = get_next_char(); | ||
153 | } while (isspace(c)); | ||
154 | |||
155 | /* We require the format to be '[base "extension"]' */ | ||
156 | if (c != '"') | ||
157 | return -1; | ||
158 | name[baselen++] = '.'; | ||
159 | |||
160 | for (;;) { | ||
161 | int c = get_next_char(); | ||
162 | if (c == '\n') | ||
163 | return -1; | ||
164 | if (c == '"') | ||
165 | break; | ||
166 | if (c == '\\') { | ||
167 | c = get_next_char(); | ||
168 | if (c == '\n') | ||
169 | return -1; | ||
170 | } | ||
171 | name[baselen++] = c; | ||
172 | if (baselen > MAXNAME / 2) | ||
173 | return -1; | ||
174 | } | ||
175 | |||
176 | /* Final ']' */ | ||
177 | if (get_next_char() != ']') | ||
178 | return -1; | ||
179 | return baselen; | ||
180 | } | ||
181 | |||
182 | static int get_base_var(char *name) | ||
183 | { | ||
184 | int baselen = 0; | ||
185 | |||
186 | for (;;) { | ||
187 | int c = get_next_char(); | ||
188 | if (config_file_eof) | ||
189 | return -1; | ||
190 | if (c == ']') | ||
191 | return baselen; | ||
192 | if (isspace(c)) | ||
193 | return get_extended_base_var(name, baselen, c); | ||
194 | if (!iskeychar(c) && c != '.') | ||
195 | return -1; | ||
196 | if (baselen > MAXNAME / 2) | ||
197 | return -1; | ||
198 | name[baselen++] = tolower(c); | ||
199 | } | ||
200 | } | ||
201 | |||
202 | static int perf_parse_file(config_fn_t fn, void *data) | ||
203 | { | ||
204 | int comment = 0; | ||
205 | int baselen = 0; | ||
206 | static char var[MAXNAME]; | ||
207 | |||
208 | /* U+FEFF Byte Order Mark in UTF8 */ | ||
209 | static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf"; | ||
210 | const unsigned char *bomptr = utf8_bom; | ||
211 | |||
212 | for (;;) { | ||
213 | int c = get_next_char(); | ||
214 | if (bomptr && *bomptr) { | ||
215 | /* We are at the file beginning; skip UTF8-encoded BOM | ||
216 | * if present. Sane editors won't put this in on their | ||
217 | * own, but e.g. Windows Notepad will do it happily. */ | ||
218 | if ((unsigned char) c == *bomptr) { | ||
219 | bomptr++; | ||
220 | continue; | ||
221 | } else { | ||
222 | /* Do not tolerate partial BOM. */ | ||
223 | if (bomptr != utf8_bom) | ||
224 | break; | ||
225 | /* No BOM at file beginning. Cool. */ | ||
226 | bomptr = NULL; | ||
227 | } | ||
228 | } | ||
229 | if (c == '\n') { | ||
230 | if (config_file_eof) | ||
231 | return 0; | ||
232 | comment = 0; | ||
233 | continue; | ||
234 | } | ||
235 | if (comment || isspace(c)) | ||
236 | continue; | ||
237 | if (c == '#' || c == ';') { | ||
238 | comment = 1; | ||
239 | continue; | ||
240 | } | ||
241 | if (c == '[') { | ||
242 | baselen = get_base_var(var); | ||
243 | if (baselen <= 0) | ||
244 | break; | ||
245 | var[baselen++] = '.'; | ||
246 | var[baselen] = 0; | ||
247 | continue; | ||
248 | } | ||
249 | if (!isalpha(c)) | ||
250 | break; | ||
251 | var[baselen] = tolower(c); | ||
252 | if (get_value(fn, data, var, baselen+1) < 0) | ||
253 | break; | ||
254 | } | ||
255 | die("bad config file line %d in %s", config_linenr, config_file_name); | ||
256 | } | ||
257 | |||
258 | static int parse_unit_factor(const char *end, unsigned long *val) | ||
259 | { | ||
260 | if (!*end) | ||
261 | return 1; | ||
262 | else if (!strcasecmp(end, "k")) { | ||
263 | *val *= 1024; | ||
264 | return 1; | ||
265 | } | ||
266 | else if (!strcasecmp(end, "m")) { | ||
267 | *val *= 1024 * 1024; | ||
268 | return 1; | ||
269 | } | ||
270 | else if (!strcasecmp(end, "g")) { | ||
271 | *val *= 1024 * 1024 * 1024; | ||
272 | return 1; | ||
273 | } | ||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | static int perf_parse_long(const char *value, long *ret) | ||
278 | { | ||
279 | if (value && *value) { | ||
280 | char *end; | ||
281 | long val = strtol(value, &end, 0); | ||
282 | unsigned long factor = 1; | ||
283 | if (!parse_unit_factor(end, &factor)) | ||
284 | return 0; | ||
285 | *ret = val * factor; | ||
286 | return 1; | ||
287 | } | ||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | int perf_parse_ulong(const char *value, unsigned long *ret) | ||
292 | { | ||
293 | if (value && *value) { | ||
294 | char *end; | ||
295 | unsigned long val = strtoul(value, &end, 0); | ||
296 | if (!parse_unit_factor(end, &val)) | ||
297 | return 0; | ||
298 | *ret = val; | ||
299 | return 1; | ||
300 | } | ||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | static void die_bad_config(const char *name) | ||
305 | { | ||
306 | if (config_file_name) | ||
307 | die("bad config value for '%s' in %s", name, config_file_name); | ||
308 | die("bad config value for '%s'", name); | ||
309 | } | ||
310 | |||
311 | int perf_config_int(const char *name, const char *value) | ||
312 | { | ||
313 | long ret = 0; | ||
314 | if (!perf_parse_long(value, &ret)) | ||
315 | die_bad_config(name); | ||
316 | return ret; | ||
317 | } | ||
318 | |||
319 | unsigned long perf_config_ulong(const char *name, const char *value) | ||
320 | { | ||
321 | unsigned long ret; | ||
322 | if (!perf_parse_ulong(value, &ret)) | ||
323 | die_bad_config(name); | ||
324 | return ret; | ||
325 | } | ||
326 | |||
327 | int perf_config_bool_or_int(const char *name, const char *value, int *is_bool) | ||
328 | { | ||
329 | *is_bool = 1; | ||
330 | if (!value) | ||
331 | return 1; | ||
332 | if (!*value) | ||
333 | return 0; | ||
334 | if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on")) | ||
335 | return 1; | ||
336 | if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off")) | ||
337 | return 0; | ||
338 | *is_bool = 0; | ||
339 | return perf_config_int(name, value); | ||
340 | } | ||
341 | |||
342 | int perf_config_bool(const char *name, const char *value) | ||
343 | { | ||
344 | int discard; | ||
345 | return !!perf_config_bool_or_int(name, value, &discard); | ||
346 | } | ||
347 | |||
348 | int perf_config_string(const char **dest, const char *var, const char *value) | ||
349 | { | ||
350 | if (!value) | ||
351 | return config_error_nonbool(var); | ||
352 | *dest = strdup(value); | ||
353 | return 0; | ||
354 | } | ||
355 | |||
356 | static int perf_default_core_config(const char *var, const char *value) | ||
357 | { | ||
358 | /* Add other config variables here and to Documentation/config.txt. */ | ||
359 | return 0; | ||
360 | } | ||
361 | |||
362 | int perf_default_config(const char *var, const char *value, void *dummy) | ||
363 | { | ||
364 | if (!prefixcmp(var, "core.")) | ||
365 | return perf_default_core_config(var, value); | ||
366 | |||
367 | /* Add other config variables here and to Documentation/config.txt. */ | ||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | int perf_config_from_file(config_fn_t fn, const char *filename, void *data) | ||
372 | { | ||
373 | int ret; | ||
374 | FILE *f = fopen(filename, "r"); | ||
375 | |||
376 | ret = -1; | ||
377 | if (f) { | ||
378 | config_file = f; | ||
379 | config_file_name = filename; | ||
380 | config_linenr = 1; | ||
381 | config_file_eof = 0; | ||
382 | ret = perf_parse_file(fn, data); | ||
383 | fclose(f); | ||
384 | config_file_name = NULL; | ||
385 | } | ||
386 | return ret; | ||
387 | } | ||
388 | |||
389 | const char *perf_etc_perfconfig(void) | ||
390 | { | ||
391 | static const char *system_wide; | ||
392 | if (!system_wide) | ||
393 | system_wide = system_path(ETC_PERFCONFIG); | ||
394 | return system_wide; | ||
395 | } | ||
396 | |||
397 | static int perf_env_bool(const char *k, int def) | ||
398 | { | ||
399 | const char *v = getenv(k); | ||
400 | return v ? perf_config_bool(k, v) : def; | ||
401 | } | ||
402 | |||
403 | int perf_config_system(void) | ||
404 | { | ||
405 | return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0); | ||
406 | } | ||
407 | |||
408 | int perf_config_global(void) | ||
409 | { | ||
410 | return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0); | ||
411 | } | ||
412 | |||
413 | int perf_config(config_fn_t fn, void *data) | ||
414 | { | ||
415 | int ret = 0, found = 0; | ||
416 | char *repo_config = NULL; | ||
417 | const char *home = NULL; | ||
418 | |||
419 | /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ | ||
420 | if (config_exclusive_filename) | ||
421 | return perf_config_from_file(fn, config_exclusive_filename, data); | ||
422 | if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) { | ||
423 | ret += perf_config_from_file(fn, perf_etc_perfconfig(), | ||
424 | data); | ||
425 | found += 1; | ||
426 | } | ||
427 | |||
428 | home = getenv("HOME"); | ||
429 | if (perf_config_global() && home) { | ||
430 | char *user_config = strdup(mkpath("%s/.perfconfig", home)); | ||
431 | if (!access(user_config, R_OK)) { | ||
432 | ret += perf_config_from_file(fn, user_config, data); | ||
433 | found += 1; | ||
434 | } | ||
435 | free(user_config); | ||
436 | } | ||
437 | |||
438 | repo_config = perf_pathdup("config"); | ||
439 | if (!access(repo_config, R_OK)) { | ||
440 | ret += perf_config_from_file(fn, repo_config, data); | ||
441 | found += 1; | ||
442 | } | ||
443 | free(repo_config); | ||
444 | if (found == 0) | ||
445 | return -1; | ||
446 | return ret; | ||
447 | } | ||
448 | |||
449 | /* | ||
450 | * Find all the stuff for perf_config_set() below. | ||
451 | */ | ||
452 | |||
453 | #define MAX_MATCHES 512 | ||
454 | |||
455 | static struct { | ||
456 | int baselen; | ||
457 | char* key; | ||
458 | int do_not_match; | ||
459 | regex_t* value_regex; | ||
460 | int multi_replace; | ||
461 | size_t offset[MAX_MATCHES]; | ||
462 | enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state; | ||
463 | int seen; | ||
464 | } store; | ||
465 | |||
466 | static int matches(const char* key, const char* value) | ||
467 | { | ||
468 | return !strcmp(key, store.key) && | ||
469 | (store.value_regex == NULL || | ||
470 | (store.do_not_match ^ | ||
471 | !regexec(store.value_regex, value, 0, NULL, 0))); | ||
472 | } | ||
473 | |||
474 | static int store_aux(const char* key, const char* value, void *cb) | ||
475 | { | ||
476 | const char *ep; | ||
477 | size_t section_len; | ||
478 | |||
479 | switch (store.state) { | ||
480 | case KEY_SEEN: | ||
481 | if (matches(key, value)) { | ||
482 | if (store.seen == 1 && store.multi_replace == 0) { | ||
483 | warning("%s has multiple values", key); | ||
484 | } else if (store.seen >= MAX_MATCHES) { | ||
485 | error("too many matches for %s", key); | ||
486 | return 1; | ||
487 | } | ||
488 | |||
489 | store.offset[store.seen] = ftell(config_file); | ||
490 | store.seen++; | ||
491 | } | ||
492 | break; | ||
493 | case SECTION_SEEN: | ||
494 | /* | ||
495 | * What we are looking for is in store.key (both | ||
496 | * section and var), and its section part is baselen | ||
497 | * long. We found key (again, both section and var). | ||
498 | * We would want to know if this key is in the same | ||
499 | * section as what we are looking for. We already | ||
500 | * know we are in the same section as what should | ||
501 | * hold store.key. | ||
502 | */ | ||
503 | ep = strrchr(key, '.'); | ||
504 | section_len = ep - key; | ||
505 | |||
506 | if ((section_len != store.baselen) || | ||
507 | memcmp(key, store.key, section_len+1)) { | ||
508 | store.state = SECTION_END_SEEN; | ||
509 | break; | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * Do not increment matches: this is no match, but we | ||
514 | * just made sure we are in the desired section. | ||
515 | */ | ||
516 | store.offset[store.seen] = ftell(config_file); | ||
517 | /* fallthru */ | ||
518 | case SECTION_END_SEEN: | ||
519 | case START: | ||
520 | if (matches(key, value)) { | ||
521 | store.offset[store.seen] = ftell(config_file); | ||
522 | store.state = KEY_SEEN; | ||
523 | store.seen++; | ||
524 | } else { | ||
525 | if (strrchr(key, '.') - key == store.baselen && | ||
526 | !strncmp(key, store.key, store.baselen)) { | ||
527 | store.state = SECTION_SEEN; | ||
528 | store.offset[store.seen] = ftell(config_file); | ||
529 | } | ||
530 | } | ||
531 | } | ||
532 | return 0; | ||
533 | } | ||
534 | |||
535 | static int store_write_section(int fd, const char* key) | ||
536 | { | ||
537 | const char *dot; | ||
538 | int i, success; | ||
539 | struct strbuf sb = STRBUF_INIT; | ||
540 | |||
541 | dot = memchr(key, '.', store.baselen); | ||
542 | if (dot) { | ||
543 | strbuf_addf(&sb, "[%.*s \"", (int)(dot - key), key); | ||
544 | for (i = dot - key + 1; i < store.baselen; i++) { | ||
545 | if (key[i] == '"' || key[i] == '\\') | ||
546 | strbuf_addch(&sb, '\\'); | ||
547 | strbuf_addch(&sb, key[i]); | ||
548 | } | ||
549 | strbuf_addstr(&sb, "\"]\n"); | ||
550 | } else { | ||
551 | strbuf_addf(&sb, "[%.*s]\n", store.baselen, key); | ||
552 | } | ||
553 | |||
554 | success = write_in_full(fd, sb.buf, sb.len) == sb.len; | ||
555 | strbuf_release(&sb); | ||
556 | |||
557 | return success; | ||
558 | } | ||
559 | |||
560 | static int store_write_pair(int fd, const char* key, const char* value) | ||
561 | { | ||
562 | int i, success; | ||
563 | int length = strlen(key + store.baselen + 1); | ||
564 | const char *quote = ""; | ||
565 | struct strbuf sb = STRBUF_INIT; | ||
566 | |||
567 | /* | ||
568 | * Check to see if the value needs to be surrounded with a dq pair. | ||
569 | * Note that problematic characters are always backslash-quoted; this | ||
570 | * check is about not losing leading or trailing SP and strings that | ||
571 | * follow beginning-of-comment characters (i.e. ';' and '#') by the | ||
572 | * configuration parser. | ||
573 | */ | ||
574 | if (value[0] == ' ') | ||
575 | quote = "\""; | ||
576 | for (i = 0; value[i]; i++) | ||
577 | if (value[i] == ';' || value[i] == '#') | ||
578 | quote = "\""; | ||
579 | if (i && value[i - 1] == ' ') | ||
580 | quote = "\""; | ||
581 | |||
582 | strbuf_addf(&sb, "\t%.*s = %s", | ||
583 | length, key + store.baselen + 1, quote); | ||
584 | |||
585 | for (i = 0; value[i]; i++) | ||
586 | switch (value[i]) { | ||
587 | case '\n': | ||
588 | strbuf_addstr(&sb, "\\n"); | ||
589 | break; | ||
590 | case '\t': | ||
591 | strbuf_addstr(&sb, "\\t"); | ||
592 | break; | ||
593 | case '"': | ||
594 | case '\\': | ||
595 | strbuf_addch(&sb, '\\'); | ||
596 | default: | ||
597 | strbuf_addch(&sb, value[i]); | ||
598 | break; | ||
599 | } | ||
600 | strbuf_addf(&sb, "%s\n", quote); | ||
601 | |||
602 | success = write_in_full(fd, sb.buf, sb.len) == sb.len; | ||
603 | strbuf_release(&sb); | ||
604 | |||
605 | return success; | ||
606 | } | ||
607 | |||
608 | static ssize_t find_beginning_of_line(const char* contents, size_t size, | ||
609 | size_t offset_, int* found_bracket) | ||
610 | { | ||
611 | size_t equal_offset = size, bracket_offset = size; | ||
612 | ssize_t offset; | ||
613 | |||
614 | contline: | ||
615 | for (offset = offset_-2; offset > 0 | ||
616 | && contents[offset] != '\n'; offset--) | ||
617 | switch (contents[offset]) { | ||
618 | case '=': equal_offset = offset; break; | ||
619 | case ']': bracket_offset = offset; break; | ||
620 | } | ||
621 | if (offset > 0 && contents[offset-1] == '\\') { | ||
622 | offset_ = offset; | ||
623 | goto contline; | ||
624 | } | ||
625 | if (bracket_offset < equal_offset) { | ||
626 | *found_bracket = 1; | ||
627 | offset = bracket_offset+1; | ||
628 | } else | ||
629 | offset++; | ||
630 | |||
631 | return offset; | ||
632 | } | ||
633 | |||
634 | int perf_config_set(const char* key, const char* value) | ||
635 | { | ||
636 | return perf_config_set_multivar(key, value, NULL, 0); | ||
637 | } | ||
638 | |||
639 | /* | ||
640 | * If value==NULL, unset in (remove from) config, | ||
641 | * if value_regex!=NULL, disregard key/value pairs where value does not match. | ||
642 | * if multi_replace==0, nothing, or only one matching key/value is replaced, | ||
643 | * else all matching key/values (regardless how many) are removed, | ||
644 | * before the new pair is written. | ||
645 | * | ||
646 | * Returns 0 on success. | ||
647 | * | ||
648 | * This function does this: | ||
649 | * | ||
650 | * - it locks the config file by creating ".perf/config.lock" | ||
651 | * | ||
652 | * - it then parses the config using store_aux() as validator to find | ||
653 | * the position on the key/value pair to replace. If it is to be unset, | ||
654 | * it must be found exactly once. | ||
655 | * | ||
656 | * - the config file is mmap()ed and the part before the match (if any) is | ||
657 | * written to the lock file, then the changed part and the rest. | ||
658 | * | ||
659 | * - the config file is removed and the lock file rename()d to it. | ||
660 | * | ||
661 | */ | ||
662 | int perf_config_set_multivar(const char* key, const char* value, | ||
663 | const char* value_regex, int multi_replace) | ||
664 | { | ||
665 | int i, dot; | ||
666 | int fd = -1, in_fd; | ||
667 | int ret = 0; | ||
668 | char* config_filename; | ||
669 | const char* last_dot = strrchr(key, '.'); | ||
670 | |||
671 | if (config_exclusive_filename) | ||
672 | config_filename = strdup(config_exclusive_filename); | ||
673 | else | ||
674 | config_filename = perf_pathdup("config"); | ||
675 | |||
676 | /* | ||
677 | * Since "key" actually contains the section name and the real | ||
678 | * key name separated by a dot, we have to know where the dot is. | ||
679 | */ | ||
680 | |||
681 | if (last_dot == NULL) { | ||
682 | error("key does not contain a section: %s", key); | ||
683 | ret = 2; | ||
684 | goto out_free; | ||
685 | } | ||
686 | store.baselen = last_dot - key; | ||
687 | |||
688 | store.multi_replace = multi_replace; | ||
689 | |||
690 | /* | ||
691 | * Validate the key and while at it, lower case it for matching. | ||
692 | */ | ||
693 | store.key = malloc(strlen(key) + 1); | ||
694 | dot = 0; | ||
695 | for (i = 0; key[i]; i++) { | ||
696 | unsigned char c = key[i]; | ||
697 | if (c == '.') | ||
698 | dot = 1; | ||
699 | /* Leave the extended basename untouched.. */ | ||
700 | if (!dot || i > store.baselen) { | ||
701 | if (!iskeychar(c) || (i == store.baselen+1 && !isalpha(c))) { | ||
702 | error("invalid key: %s", key); | ||
703 | free(store.key); | ||
704 | ret = 1; | ||
705 | goto out_free; | ||
706 | } | ||
707 | c = tolower(c); | ||
708 | } else if (c == '\n') { | ||
709 | error("invalid key (newline): %s", key); | ||
710 | free(store.key); | ||
711 | ret = 1; | ||
712 | goto out_free; | ||
713 | } | ||
714 | store.key[i] = c; | ||
715 | } | ||
716 | store.key[i] = 0; | ||
717 | |||
718 | /* | ||
719 | * If .perf/config does not exist yet, write a minimal version. | ||
720 | */ | ||
721 | in_fd = open(config_filename, O_RDONLY); | ||
722 | if ( in_fd < 0 ) { | ||
723 | free(store.key); | ||
724 | |||
725 | if ( ENOENT != errno ) { | ||
726 | error("opening %s: %s", config_filename, | ||
727 | strerror(errno)); | ||
728 | ret = 3; /* same as "invalid config file" */ | ||
729 | goto out_free; | ||
730 | } | ||
731 | /* if nothing to unset, error out */ | ||
732 | if (value == NULL) { | ||
733 | ret = 5; | ||
734 | goto out_free; | ||
735 | } | ||
736 | |||
737 | store.key = (char*)key; | ||
738 | if (!store_write_section(fd, key) || | ||
739 | !store_write_pair(fd, key, value)) | ||
740 | goto write_err_out; | ||
741 | } else { | ||
742 | struct stat st; | ||
743 | char* contents; | ||
744 | size_t contents_sz, copy_begin, copy_end; | ||
745 | int i, new_line = 0; | ||
746 | |||
747 | if (value_regex == NULL) | ||
748 | store.value_regex = NULL; | ||
749 | else { | ||
750 | if (value_regex[0] == '!') { | ||
751 | store.do_not_match = 1; | ||
752 | value_regex++; | ||
753 | } else | ||
754 | store.do_not_match = 0; | ||
755 | |||
756 | store.value_regex = (regex_t*)malloc(sizeof(regex_t)); | ||
757 | if (regcomp(store.value_regex, value_regex, | ||
758 | REG_EXTENDED)) { | ||
759 | error("invalid pattern: %s", value_regex); | ||
760 | free(store.value_regex); | ||
761 | ret = 6; | ||
762 | goto out_free; | ||
763 | } | ||
764 | } | ||
765 | |||
766 | store.offset[0] = 0; | ||
767 | store.state = START; | ||
768 | store.seen = 0; | ||
769 | |||
770 | /* | ||
771 | * After this, store.offset will contain the *end* offset | ||
772 | * of the last match, or remain at 0 if no match was found. | ||
773 | * As a side effect, we make sure to transform only a valid | ||
774 | * existing config file. | ||
775 | */ | ||
776 | if (perf_config_from_file(store_aux, config_filename, NULL)) { | ||
777 | error("invalid config file %s", config_filename); | ||
778 | free(store.key); | ||
779 | if (store.value_regex != NULL) { | ||
780 | regfree(store.value_regex); | ||
781 | free(store.value_regex); | ||
782 | } | ||
783 | ret = 3; | ||
784 | goto out_free; | ||
785 | } | ||
786 | |||
787 | free(store.key); | ||
788 | if (store.value_regex != NULL) { | ||
789 | regfree(store.value_regex); | ||
790 | free(store.value_regex); | ||
791 | } | ||
792 | |||
793 | /* if nothing to unset, or too many matches, error out */ | ||
794 | if ((store.seen == 0 && value == NULL) || | ||
795 | (store.seen > 1 && multi_replace == 0)) { | ||
796 | ret = 5; | ||
797 | goto out_free; | ||
798 | } | ||
799 | |||
800 | fstat(in_fd, &st); | ||
801 | contents_sz = xsize_t(st.st_size); | ||
802 | contents = mmap(NULL, contents_sz, PROT_READ, | ||
803 | MAP_PRIVATE, in_fd, 0); | ||
804 | close(in_fd); | ||
805 | |||
806 | if (store.seen == 0) | ||
807 | store.seen = 1; | ||
808 | |||
809 | for (i = 0, copy_begin = 0; i < store.seen; i++) { | ||
810 | if (store.offset[i] == 0) { | ||
811 | store.offset[i] = copy_end = contents_sz; | ||
812 | } else if (store.state != KEY_SEEN) { | ||
813 | copy_end = store.offset[i]; | ||
814 | } else | ||
815 | copy_end = find_beginning_of_line( | ||
816 | contents, contents_sz, | ||
817 | store.offset[i]-2, &new_line); | ||
818 | |||
819 | if (copy_end > 0 && contents[copy_end-1] != '\n') | ||
820 | new_line = 1; | ||
821 | |||
822 | /* write the first part of the config */ | ||
823 | if (copy_end > copy_begin) { | ||
824 | if (write_in_full(fd, contents + copy_begin, | ||
825 | copy_end - copy_begin) < | ||
826 | copy_end - copy_begin) | ||
827 | goto write_err_out; | ||
828 | if (new_line && | ||
829 | write_in_full(fd, "\n", 1) != 1) | ||
830 | goto write_err_out; | ||
831 | } | ||
832 | copy_begin = store.offset[i]; | ||
833 | } | ||
834 | |||
835 | /* write the pair (value == NULL means unset) */ | ||
836 | if (value != NULL) { | ||
837 | if (store.state == START) { | ||
838 | if (!store_write_section(fd, key)) | ||
839 | goto write_err_out; | ||
840 | } | ||
841 | if (!store_write_pair(fd, key, value)) | ||
842 | goto write_err_out; | ||
843 | } | ||
844 | |||
845 | /* write the rest of the config */ | ||
846 | if (copy_begin < contents_sz) | ||
847 | if (write_in_full(fd, contents + copy_begin, | ||
848 | contents_sz - copy_begin) < | ||
849 | contents_sz - copy_begin) | ||
850 | goto write_err_out; | ||
851 | |||
852 | munmap(contents, contents_sz); | ||
853 | } | ||
854 | |||
855 | ret = 0; | ||
856 | |||
857 | out_free: | ||
858 | free(config_filename); | ||
859 | return ret; | ||
860 | |||
861 | write_err_out: | ||
862 | goto out_free; | ||
863 | |||
864 | } | ||
865 | |||
866 | /* | ||
867 | * Call this to report error for your variable that should not | ||
868 | * get a boolean value (i.e. "[my] var" means "true"). | ||
869 | */ | ||
870 | int config_error_nonbool(const char *var) | ||
871 | { | ||
872 | return error("Missing value for '%s'", var); | ||
873 | } | ||
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c new file mode 100644 index 000000000000..b90ec004f29c --- /dev/null +++ b/tools/perf/util/ctype.c | |||
@@ -0,0 +1,26 @@ | |||
1 | /* | ||
2 | * Sane locale-independent, ASCII ctype. | ||
3 | * | ||
4 | * No surprises, and works with signed and unsigned chars. | ||
5 | */ | ||
6 | #include "cache.h" | ||
7 | |||
8 | enum { | ||
9 | S = GIT_SPACE, | ||
10 | A = GIT_ALPHA, | ||
11 | D = GIT_DIGIT, | ||
12 | G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ | ||
13 | R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ | ||
14 | }; | ||
15 | |||
16 | unsigned char sane_ctype[256] = { | ||
17 | 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ | ||
18 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ | ||
19 | S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */ | ||
20 | D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ | ||
21 | 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ | ||
22 | A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */ | ||
23 | 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ | ||
24 | A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */ | ||
25 | /* Nothing in the 128.. range */ | ||
26 | }; | ||
diff --git a/tools/perf/util/environment.c b/tools/perf/util/environment.c new file mode 100644 index 000000000000..275b0ee345f5 --- /dev/null +++ b/tools/perf/util/environment.c | |||
@@ -0,0 +1,9 @@ | |||
1 | /* | ||
2 | * We put all the perf config variables in this same object | ||
3 | * file, so that programs can link against the config parser | ||
4 | * without having to link against all the rest of perf. | ||
5 | */ | ||
6 | #include "cache.h" | ||
7 | |||
8 | const char *pager_program; | ||
9 | int pager_use_color = 1; | ||
diff --git a/tools/perf/util/exec_cmd.c b/tools/perf/util/exec_cmd.c new file mode 100644 index 000000000000..d39292263153 --- /dev/null +++ b/tools/perf/util/exec_cmd.c | |||
@@ -0,0 +1,165 @@ | |||
1 | #include "cache.h" | ||
2 | #include "exec_cmd.h" | ||
3 | #include "quote.h" | ||
4 | #define MAX_ARGS 32 | ||
5 | |||
6 | extern char **environ; | ||
7 | static const char *argv_exec_path; | ||
8 | static const char *argv0_path; | ||
9 | |||
10 | const char *system_path(const char *path) | ||
11 | { | ||
12 | #ifdef RUNTIME_PREFIX | ||
13 | static const char *prefix; | ||
14 | #else | ||
15 | static const char *prefix = PREFIX; | ||
16 | #endif | ||
17 | struct strbuf d = STRBUF_INIT; | ||
18 | |||
19 | if (is_absolute_path(path)) | ||
20 | return path; | ||
21 | |||
22 | #ifdef RUNTIME_PREFIX | ||
23 | assert(argv0_path); | ||
24 | assert(is_absolute_path(argv0_path)); | ||
25 | |||
26 | if (!prefix && | ||
27 | !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) && | ||
28 | !(prefix = strip_path_suffix(argv0_path, BINDIR)) && | ||
29 | !(prefix = strip_path_suffix(argv0_path, "perf"))) { | ||
30 | prefix = PREFIX; | ||
31 | fprintf(stderr, "RUNTIME_PREFIX requested, " | ||
32 | "but prefix computation failed. " | ||
33 | "Using static fallback '%s'.\n", prefix); | ||
34 | } | ||
35 | #endif | ||
36 | |||
37 | strbuf_addf(&d, "%s/%s", prefix, path); | ||
38 | path = strbuf_detach(&d, NULL); | ||
39 | return path; | ||
40 | } | ||
41 | |||
42 | const char *perf_extract_argv0_path(const char *argv0) | ||
43 | { | ||
44 | const char *slash; | ||
45 | |||
46 | if (!argv0 || !*argv0) | ||
47 | return NULL; | ||
48 | slash = argv0 + strlen(argv0); | ||
49 | |||
50 | while (argv0 <= slash && !is_dir_sep(*slash)) | ||
51 | slash--; | ||
52 | |||
53 | if (slash >= argv0) { | ||
54 | argv0_path = strndup(argv0, slash - argv0); | ||
55 | return slash + 1; | ||
56 | } | ||
57 | |||
58 | return argv0; | ||
59 | } | ||
60 | |||
61 | void perf_set_argv_exec_path(const char *exec_path) | ||
62 | { | ||
63 | argv_exec_path = exec_path; | ||
64 | /* | ||
65 | * Propagate this setting to external programs. | ||
66 | */ | ||
67 | setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1); | ||
68 | } | ||
69 | |||
70 | |||
71 | /* Returns the highest-priority, location to look for perf programs. */ | ||
72 | const char *perf_exec_path(void) | ||
73 | { | ||
74 | const char *env; | ||
75 | |||
76 | if (argv_exec_path) | ||
77 | return argv_exec_path; | ||
78 | |||
79 | env = getenv(EXEC_PATH_ENVIRONMENT); | ||
80 | if (env && *env) { | ||
81 | return env; | ||
82 | } | ||
83 | |||
84 | return system_path(PERF_EXEC_PATH); | ||
85 | } | ||
86 | |||
87 | static void add_path(struct strbuf *out, const char *path) | ||
88 | { | ||
89 | if (path && *path) { | ||
90 | if (is_absolute_path(path)) | ||
91 | strbuf_addstr(out, path); | ||
92 | else | ||
93 | strbuf_addstr(out, make_nonrelative_path(path)); | ||
94 | |||
95 | strbuf_addch(out, PATH_SEP); | ||
96 | } | ||
97 | } | ||
98 | |||
99 | void setup_path(void) | ||
100 | { | ||
101 | const char *old_path = getenv("PATH"); | ||
102 | struct strbuf new_path = STRBUF_INIT; | ||
103 | |||
104 | add_path(&new_path, perf_exec_path()); | ||
105 | add_path(&new_path, argv0_path); | ||
106 | |||
107 | if (old_path) | ||
108 | strbuf_addstr(&new_path, old_path); | ||
109 | else | ||
110 | strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin"); | ||
111 | |||
112 | setenv("PATH", new_path.buf, 1); | ||
113 | |||
114 | strbuf_release(&new_path); | ||
115 | } | ||
116 | |||
117 | const char **prepare_perf_cmd(const char **argv) | ||
118 | { | ||
119 | int argc; | ||
120 | const char **nargv; | ||
121 | |||
122 | for (argc = 0; argv[argc]; argc++) | ||
123 | ; /* just counting */ | ||
124 | nargv = malloc(sizeof(*nargv) * (argc + 2)); | ||
125 | |||
126 | nargv[0] = "perf"; | ||
127 | for (argc = 0; argv[argc]; argc++) | ||
128 | nargv[argc + 1] = argv[argc]; | ||
129 | nargv[argc + 1] = NULL; | ||
130 | return nargv; | ||
131 | } | ||
132 | |||
133 | int execv_perf_cmd(const char **argv) { | ||
134 | const char **nargv = prepare_perf_cmd(argv); | ||
135 | |||
136 | /* execvp() can only ever return if it fails */ | ||
137 | execvp("perf", (char **)nargv); | ||
138 | |||
139 | free(nargv); | ||
140 | return -1; | ||
141 | } | ||
142 | |||
143 | |||
144 | int execl_perf_cmd(const char *cmd,...) | ||
145 | { | ||
146 | int argc; | ||
147 | const char *argv[MAX_ARGS + 1]; | ||
148 | const char *arg; | ||
149 | va_list param; | ||
150 | |||
151 | va_start(param, cmd); | ||
152 | argv[0] = cmd; | ||
153 | argc = 1; | ||
154 | while (argc < MAX_ARGS) { | ||
155 | arg = argv[argc++] = va_arg(param, char *); | ||
156 | if (!arg) | ||
157 | break; | ||
158 | } | ||
159 | va_end(param); | ||
160 | if (MAX_ARGS <= argc) | ||
161 | return error("too many args to run %s", cmd); | ||
162 | |||
163 | argv[argc] = NULL; | ||
164 | return execv_perf_cmd(argv); | ||
165 | } | ||
diff --git a/tools/perf/util/exec_cmd.h b/tools/perf/util/exec_cmd.h new file mode 100644 index 000000000000..effe25eb1545 --- /dev/null +++ b/tools/perf/util/exec_cmd.h | |||
@@ -0,0 +1,13 @@ | |||
1 | #ifndef PERF_EXEC_CMD_H | ||
2 | #define PERF_EXEC_CMD_H | ||
3 | |||
4 | extern void perf_set_argv_exec_path(const char *exec_path); | ||
5 | extern const char *perf_extract_argv0_path(const char *path); | ||
6 | extern const char *perf_exec_path(void); | ||
7 | extern void setup_path(void); | ||
8 | extern const char **prepare_perf_cmd(const char **argv); | ||
9 | extern int execv_perf_cmd(const char **argv); /* NULL terminated */ | ||
10 | extern int execl_perf_cmd(const char *cmd, ...); | ||
11 | extern const char *system_path(const char *path); | ||
12 | |||
13 | #endif /* PERF_EXEC_CMD_H */ | ||
diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh new file mode 100755 index 000000000000..f06f6fd148f8 --- /dev/null +++ b/tools/perf/util/generate-cmdlist.sh | |||
@@ -0,0 +1,24 @@ | |||
1 | #!/bin/sh | ||
2 | |||
3 | echo "/* Automatically generated by $0 */ | ||
4 | struct cmdname_help | ||
5 | { | ||
6 | char name[16]; | ||
7 | char help[80]; | ||
8 | }; | ||
9 | |||
10 | static struct cmdname_help common_cmds[] = {" | ||
11 | |||
12 | sed -n -e 's/^perf-\([^ ]*\)[ ].* common.*/\1/p' command-list.txt | | ||
13 | sort | | ||
14 | while read cmd | ||
15 | do | ||
16 | sed -n ' | ||
17 | /^NAME/,/perf-'"$cmd"'/H | ||
18 | ${ | ||
19 | x | ||
20 | s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ | ||
21 | p | ||
22 | }' "Documentation/perf-$cmd.txt" | ||
23 | done | ||
24 | echo "};" | ||
diff --git a/tools/perf/util/help.c b/tools/perf/util/help.c new file mode 100644 index 000000000000..6653f7dd1d78 --- /dev/null +++ b/tools/perf/util/help.c | |||
@@ -0,0 +1,367 @@ | |||
1 | #include "cache.h" | ||
2 | #include "../builtin.h" | ||
3 | #include "exec_cmd.h" | ||
4 | #include "levenshtein.h" | ||
5 | #include "help.h" | ||
6 | |||
7 | /* most GUI terminals set COLUMNS (although some don't export it) */ | ||
8 | static int term_columns(void) | ||
9 | { | ||
10 | char *col_string = getenv("COLUMNS"); | ||
11 | int n_cols; | ||
12 | |||
13 | if (col_string && (n_cols = atoi(col_string)) > 0) | ||
14 | return n_cols; | ||
15 | |||
16 | #ifdef TIOCGWINSZ | ||
17 | { | ||
18 | struct winsize ws; | ||
19 | if (!ioctl(1, TIOCGWINSZ, &ws)) { | ||
20 | if (ws.ws_col) | ||
21 | return ws.ws_col; | ||
22 | } | ||
23 | } | ||
24 | #endif | ||
25 | |||
26 | return 80; | ||
27 | } | ||
28 | |||
29 | void add_cmdname(struct cmdnames *cmds, const char *name, int len) | ||
30 | { | ||
31 | struct cmdname *ent = malloc(sizeof(*ent) + len + 1); | ||
32 | |||
33 | ent->len = len; | ||
34 | memcpy(ent->name, name, len); | ||
35 | ent->name[len] = 0; | ||
36 | |||
37 | ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc); | ||
38 | cmds->names[cmds->cnt++] = ent; | ||
39 | } | ||
40 | |||
41 | static void clean_cmdnames(struct cmdnames *cmds) | ||
42 | { | ||
43 | int i; | ||
44 | for (i = 0; i < cmds->cnt; ++i) | ||
45 | free(cmds->names[i]); | ||
46 | free(cmds->names); | ||
47 | cmds->cnt = 0; | ||
48 | cmds->alloc = 0; | ||
49 | } | ||
50 | |||
51 | static int cmdname_compare(const void *a_, const void *b_) | ||
52 | { | ||
53 | struct cmdname *a = *(struct cmdname **)a_; | ||
54 | struct cmdname *b = *(struct cmdname **)b_; | ||
55 | return strcmp(a->name, b->name); | ||
56 | } | ||
57 | |||
58 | static void uniq(struct cmdnames *cmds) | ||
59 | { | ||
60 | int i, j; | ||
61 | |||
62 | if (!cmds->cnt) | ||
63 | return; | ||
64 | |||
65 | for (i = j = 1; i < cmds->cnt; i++) | ||
66 | if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) | ||
67 | cmds->names[j++] = cmds->names[i]; | ||
68 | |||
69 | cmds->cnt = j; | ||
70 | } | ||
71 | |||
72 | void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) | ||
73 | { | ||
74 | int ci, cj, ei; | ||
75 | int cmp; | ||
76 | |||
77 | ci = cj = ei = 0; | ||
78 | while (ci < cmds->cnt && ei < excludes->cnt) { | ||
79 | cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name); | ||
80 | if (cmp < 0) | ||
81 | cmds->names[cj++] = cmds->names[ci++]; | ||
82 | else if (cmp == 0) | ||
83 | ci++, ei++; | ||
84 | else if (cmp > 0) | ||
85 | ei++; | ||
86 | } | ||
87 | |||
88 | while (ci < cmds->cnt) | ||
89 | cmds->names[cj++] = cmds->names[ci++]; | ||
90 | |||
91 | cmds->cnt = cj; | ||
92 | } | ||
93 | |||
94 | static void pretty_print_string_list(struct cmdnames *cmds, int longest) | ||
95 | { | ||
96 | int cols = 1, rows; | ||
97 | int space = longest + 1; /* min 1 SP between words */ | ||
98 | int max_cols = term_columns() - 1; /* don't print *on* the edge */ | ||
99 | int i, j; | ||
100 | |||
101 | if (space < max_cols) | ||
102 | cols = max_cols / space; | ||
103 | rows = (cmds->cnt + cols - 1) / cols; | ||
104 | |||
105 | for (i = 0; i < rows; i++) { | ||
106 | printf(" "); | ||
107 | |||
108 | for (j = 0; j < cols; j++) { | ||
109 | int n = j * rows + i; | ||
110 | int size = space; | ||
111 | if (n >= cmds->cnt) | ||
112 | break; | ||
113 | if (j == cols-1 || n + rows >= cmds->cnt) | ||
114 | size = 1; | ||
115 | printf("%-*s", size, cmds->names[n]->name); | ||
116 | } | ||
117 | putchar('\n'); | ||
118 | } | ||
119 | } | ||
120 | |||
121 | static int is_executable(const char *name) | ||
122 | { | ||
123 | struct stat st; | ||
124 | |||
125 | if (stat(name, &st) || /* stat, not lstat */ | ||
126 | !S_ISREG(st.st_mode)) | ||
127 | return 0; | ||
128 | |||
129 | #ifdef __MINGW32__ | ||
130 | /* cannot trust the executable bit, peek into the file instead */ | ||
131 | char buf[3] = { 0 }; | ||
132 | int n; | ||
133 | int fd = open(name, O_RDONLY); | ||
134 | st.st_mode &= ~S_IXUSR; | ||
135 | if (fd >= 0) { | ||
136 | n = read(fd, buf, 2); | ||
137 | if (n == 2) | ||
138 | /* DOS executables start with "MZ" */ | ||
139 | if (!strcmp(buf, "#!") || !strcmp(buf, "MZ")) | ||
140 | st.st_mode |= S_IXUSR; | ||
141 | close(fd); | ||
142 | } | ||
143 | #endif | ||
144 | return st.st_mode & S_IXUSR; | ||
145 | } | ||
146 | |||
147 | static void list_commands_in_dir(struct cmdnames *cmds, | ||
148 | const char *path, | ||
149 | const char *prefix) | ||
150 | { | ||
151 | int prefix_len; | ||
152 | DIR *dir = opendir(path); | ||
153 | struct dirent *de; | ||
154 | struct strbuf buf = STRBUF_INIT; | ||
155 | int len; | ||
156 | |||
157 | if (!dir) | ||
158 | return; | ||
159 | if (!prefix) | ||
160 | prefix = "perf-"; | ||
161 | prefix_len = strlen(prefix); | ||
162 | |||
163 | strbuf_addf(&buf, "%s/", path); | ||
164 | len = buf.len; | ||
165 | |||
166 | while ((de = readdir(dir)) != NULL) { | ||
167 | int entlen; | ||
168 | |||
169 | if (prefixcmp(de->d_name, prefix)) | ||
170 | continue; | ||
171 | |||
172 | strbuf_setlen(&buf, len); | ||
173 | strbuf_addstr(&buf, de->d_name); | ||
174 | if (!is_executable(buf.buf)) | ||
175 | continue; | ||
176 | |||
177 | entlen = strlen(de->d_name) - prefix_len; | ||
178 | if (has_extension(de->d_name, ".exe")) | ||
179 | entlen -= 4; | ||
180 | |||
181 | add_cmdname(cmds, de->d_name + prefix_len, entlen); | ||
182 | } | ||
183 | closedir(dir); | ||
184 | strbuf_release(&buf); | ||
185 | } | ||
186 | |||
187 | void load_command_list(const char *prefix, | ||
188 | struct cmdnames *main_cmds, | ||
189 | struct cmdnames *other_cmds) | ||
190 | { | ||
191 | const char *env_path = getenv("PATH"); | ||
192 | const char *exec_path = perf_exec_path(); | ||
193 | |||
194 | if (exec_path) { | ||
195 | list_commands_in_dir(main_cmds, exec_path, prefix); | ||
196 | qsort(main_cmds->names, main_cmds->cnt, | ||
197 | sizeof(*main_cmds->names), cmdname_compare); | ||
198 | uniq(main_cmds); | ||
199 | } | ||
200 | |||
201 | if (env_path) { | ||
202 | char *paths, *path, *colon; | ||
203 | path = paths = strdup(env_path); | ||
204 | while (1) { | ||
205 | if ((colon = strchr(path, PATH_SEP))) | ||
206 | *colon = 0; | ||
207 | if (!exec_path || strcmp(path, exec_path)) | ||
208 | list_commands_in_dir(other_cmds, path, prefix); | ||
209 | |||
210 | if (!colon) | ||
211 | break; | ||
212 | path = colon + 1; | ||
213 | } | ||
214 | free(paths); | ||
215 | |||
216 | qsort(other_cmds->names, other_cmds->cnt, | ||
217 | sizeof(*other_cmds->names), cmdname_compare); | ||
218 | uniq(other_cmds); | ||
219 | } | ||
220 | exclude_cmds(other_cmds, main_cmds); | ||
221 | } | ||
222 | |||
223 | void list_commands(const char *title, struct cmdnames *main_cmds, | ||
224 | struct cmdnames *other_cmds) | ||
225 | { | ||
226 | int i, longest = 0; | ||
227 | |||
228 | for (i = 0; i < main_cmds->cnt; i++) | ||
229 | if (longest < main_cmds->names[i]->len) | ||
230 | longest = main_cmds->names[i]->len; | ||
231 | for (i = 0; i < other_cmds->cnt; i++) | ||
232 | if (longest < other_cmds->names[i]->len) | ||
233 | longest = other_cmds->names[i]->len; | ||
234 | |||
235 | if (main_cmds->cnt) { | ||
236 | const char *exec_path = perf_exec_path(); | ||
237 | printf("available %s in '%s'\n", title, exec_path); | ||
238 | printf("----------------"); | ||
239 | mput_char('-', strlen(title) + strlen(exec_path)); | ||
240 | putchar('\n'); | ||
241 | pretty_print_string_list(main_cmds, longest); | ||
242 | putchar('\n'); | ||
243 | } | ||
244 | |||
245 | if (other_cmds->cnt) { | ||
246 | printf("%s available from elsewhere on your $PATH\n", title); | ||
247 | printf("---------------------------------------"); | ||
248 | mput_char('-', strlen(title)); | ||
249 | putchar('\n'); | ||
250 | pretty_print_string_list(other_cmds, longest); | ||
251 | putchar('\n'); | ||
252 | } | ||
253 | } | ||
254 | |||
255 | int is_in_cmdlist(struct cmdnames *c, const char *s) | ||
256 | { | ||
257 | int i; | ||
258 | for (i = 0; i < c->cnt; i++) | ||
259 | if (!strcmp(s, c->names[i]->name)) | ||
260 | return 1; | ||
261 | return 0; | ||
262 | } | ||
263 | |||
264 | static int autocorrect; | ||
265 | static struct cmdnames aliases; | ||
266 | |||
267 | static int perf_unknown_cmd_config(const char *var, const char *value, void *cb) | ||
268 | { | ||
269 | if (!strcmp(var, "help.autocorrect")) | ||
270 | autocorrect = perf_config_int(var,value); | ||
271 | /* Also use aliases for command lookup */ | ||
272 | if (!prefixcmp(var, "alias.")) | ||
273 | add_cmdname(&aliases, var + 6, strlen(var + 6)); | ||
274 | |||
275 | return perf_default_config(var, value, cb); | ||
276 | } | ||
277 | |||
278 | static int levenshtein_compare(const void *p1, const void *p2) | ||
279 | { | ||
280 | const struct cmdname *const *c1 = p1, *const *c2 = p2; | ||
281 | const char *s1 = (*c1)->name, *s2 = (*c2)->name; | ||
282 | int l1 = (*c1)->len; | ||
283 | int l2 = (*c2)->len; | ||
284 | return l1 != l2 ? l1 - l2 : strcmp(s1, s2); | ||
285 | } | ||
286 | |||
287 | static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) | ||
288 | { | ||
289 | int i; | ||
290 | ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc); | ||
291 | |||
292 | for (i = 0; i < old->cnt; i++) | ||
293 | cmds->names[cmds->cnt++] = old->names[i]; | ||
294 | free(old->names); | ||
295 | old->cnt = 0; | ||
296 | old->names = NULL; | ||
297 | } | ||
298 | |||
299 | const char *help_unknown_cmd(const char *cmd) | ||
300 | { | ||
301 | int i, n = 0, best_similarity = 0; | ||
302 | struct cmdnames main_cmds, other_cmds; | ||
303 | |||
304 | memset(&main_cmds, 0, sizeof(main_cmds)); | ||
305 | memset(&other_cmds, 0, sizeof(main_cmds)); | ||
306 | memset(&aliases, 0, sizeof(aliases)); | ||
307 | |||
308 | perf_config(perf_unknown_cmd_config, NULL); | ||
309 | |||
310 | load_command_list("perf-", &main_cmds, &other_cmds); | ||
311 | |||
312 | add_cmd_list(&main_cmds, &aliases); | ||
313 | add_cmd_list(&main_cmds, &other_cmds); | ||
314 | qsort(main_cmds.names, main_cmds.cnt, | ||
315 | sizeof(main_cmds.names), cmdname_compare); | ||
316 | uniq(&main_cmds); | ||
317 | |||
318 | if (main_cmds.cnt) { | ||
319 | /* This reuses cmdname->len for similarity index */ | ||
320 | for (i = 0; i < main_cmds.cnt; ++i) | ||
321 | main_cmds.names[i]->len = | ||
322 | levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4); | ||
323 | |||
324 | qsort(main_cmds.names, main_cmds.cnt, | ||
325 | sizeof(*main_cmds.names), levenshtein_compare); | ||
326 | |||
327 | best_similarity = main_cmds.names[0]->len; | ||
328 | n = 1; | ||
329 | while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len) | ||
330 | ++n; | ||
331 | } | ||
332 | |||
333 | if (autocorrect && n == 1) { | ||
334 | const char *assumed = main_cmds.names[0]->name; | ||
335 | |||
336 | main_cmds.names[0] = NULL; | ||
337 | clean_cmdnames(&main_cmds); | ||
338 | fprintf(stderr, "WARNING: You called a Git program named '%s', " | ||
339 | "which does not exist.\n" | ||
340 | "Continuing under the assumption that you meant '%s'\n", | ||
341 | cmd, assumed); | ||
342 | if (autocorrect > 0) { | ||
343 | fprintf(stderr, "in %0.1f seconds automatically...\n", | ||
344 | (float)autocorrect/10.0); | ||
345 | poll(NULL, 0, autocorrect * 100); | ||
346 | } | ||
347 | return assumed; | ||
348 | } | ||
349 | |||
350 | fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd); | ||
351 | |||
352 | if (main_cmds.cnt && best_similarity < 6) { | ||
353 | fprintf(stderr, "\nDid you mean %s?\n", | ||
354 | n < 2 ? "this": "one of these"); | ||
355 | |||
356 | for (i = 0; i < n; i++) | ||
357 | fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); | ||
358 | } | ||
359 | |||
360 | exit(1); | ||
361 | } | ||
362 | |||
363 | int cmd_version(int argc, const char **argv, const char *prefix) | ||
364 | { | ||
365 | printf("perf version %s\n", perf_version_string); | ||
366 | return 0; | ||
367 | } | ||
diff --git a/tools/perf/util/help.h b/tools/perf/util/help.h new file mode 100644 index 000000000000..56bc15406ffc --- /dev/null +++ b/tools/perf/util/help.h | |||
@@ -0,0 +1,29 @@ | |||
1 | #ifndef HELP_H | ||
2 | #define HELP_H | ||
3 | |||
4 | struct cmdnames { | ||
5 | int alloc; | ||
6 | int cnt; | ||
7 | struct cmdname { | ||
8 | size_t len; /* also used for similarity index in help.c */ | ||
9 | char name[FLEX_ARRAY]; | ||
10 | } **names; | ||
11 | }; | ||
12 | |||
13 | static inline void mput_char(char c, unsigned int num) | ||
14 | { | ||
15 | while(num--) | ||
16 | putchar(c); | ||
17 | } | ||
18 | |||
19 | void load_command_list(const char *prefix, | ||
20 | struct cmdnames *main_cmds, | ||
21 | struct cmdnames *other_cmds); | ||
22 | void add_cmdname(struct cmdnames *cmds, const char *name, int len); | ||
23 | /* Here we require that excludes is a sorted list. */ | ||
24 | void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes); | ||
25 | int is_in_cmdlist(struct cmdnames *c, const char *s); | ||
26 | void list_commands(const char *title, struct cmdnames *main_cmds, | ||
27 | struct cmdnames *other_cmds); | ||
28 | |||
29 | #endif /* HELP_H */ | ||
diff --git a/tools/perf/util/levenshtein.c b/tools/perf/util/levenshtein.c new file mode 100644 index 000000000000..e521d1516df6 --- /dev/null +++ b/tools/perf/util/levenshtein.c | |||
@@ -0,0 +1,84 @@ | |||
1 | #include "cache.h" | ||
2 | #include "levenshtein.h" | ||
3 | |||
4 | /* | ||
5 | * This function implements the Damerau-Levenshtein algorithm to | ||
6 | * calculate a distance between strings. | ||
7 | * | ||
8 | * Basically, it says how many letters need to be swapped, substituted, | ||
9 | * deleted from, or added to string1, at least, to get string2. | ||
10 | * | ||
11 | * The idea is to build a distance matrix for the substrings of both | ||
12 | * strings. To avoid a large space complexity, only the last three rows | ||
13 | * are kept in memory (if swaps had the same or higher cost as one deletion | ||
14 | * plus one insertion, only two rows would be needed). | ||
15 | * | ||
16 | * At any stage, "i + 1" denotes the length of the current substring of | ||
17 | * string1 that the distance is calculated for. | ||
18 | * | ||
19 | * row2 holds the current row, row1 the previous row (i.e. for the substring | ||
20 | * of string1 of length "i"), and row0 the row before that. | ||
21 | * | ||
22 | * In other words, at the start of the big loop, row2[j + 1] contains the | ||
23 | * Damerau-Levenshtein distance between the substring of string1 of length | ||
24 | * "i" and the substring of string2 of length "j + 1". | ||
25 | * | ||
26 | * All the big loop does is determine the partial minimum-cost paths. | ||
27 | * | ||
28 | * It does so by calculating the costs of the path ending in characters | ||
29 | * i (in string1) and j (in string2), respectively, given that the last | ||
30 | * operation is a substition, a swap, a deletion, or an insertion. | ||
31 | * | ||
32 | * This implementation allows the costs to be weighted: | ||
33 | * | ||
34 | * - w (as in "sWap") | ||
35 | * - s (as in "Substitution") | ||
36 | * - a (for insertion, AKA "Add") | ||
37 | * - d (as in "Deletion") | ||
38 | * | ||
39 | * Note that this algorithm calculates a distance _iff_ d == a. | ||
40 | */ | ||
41 | int levenshtein(const char *string1, const char *string2, | ||
42 | int w, int s, int a, int d) | ||
43 | { | ||
44 | int len1 = strlen(string1), len2 = strlen(string2); | ||
45 | int *row0 = malloc(sizeof(int) * (len2 + 1)); | ||
46 | int *row1 = malloc(sizeof(int) * (len2 + 1)); | ||
47 | int *row2 = malloc(sizeof(int) * (len2 + 1)); | ||
48 | int i, j; | ||
49 | |||
50 | for (j = 0; j <= len2; j++) | ||
51 | row1[j] = j * a; | ||
52 | for (i = 0; i < len1; i++) { | ||
53 | int *dummy; | ||
54 | |||
55 | row2[0] = (i + 1) * d; | ||
56 | for (j = 0; j < len2; j++) { | ||
57 | /* substitution */ | ||
58 | row2[j + 1] = row1[j] + s * (string1[i] != string2[j]); | ||
59 | /* swap */ | ||
60 | if (i > 0 && j > 0 && string1[i - 1] == string2[j] && | ||
61 | string1[i] == string2[j - 1] && | ||
62 | row2[j + 1] > row0[j - 1] + w) | ||
63 | row2[j + 1] = row0[j - 1] + w; | ||
64 | /* deletion */ | ||
65 | if (row2[j + 1] > row1[j + 1] + d) | ||
66 | row2[j + 1] = row1[j + 1] + d; | ||
67 | /* insertion */ | ||
68 | if (row2[j + 1] > row2[j] + a) | ||
69 | row2[j + 1] = row2[j] + a; | ||
70 | } | ||
71 | |||
72 | dummy = row0; | ||
73 | row0 = row1; | ||
74 | row1 = row2; | ||
75 | row2 = dummy; | ||
76 | } | ||
77 | |||
78 | i = row1[len2]; | ||
79 | free(row0); | ||
80 | free(row1); | ||
81 | free(row2); | ||
82 | |||
83 | return i; | ||
84 | } | ||
diff --git a/tools/perf/util/levenshtein.h b/tools/perf/util/levenshtein.h new file mode 100644 index 000000000000..0173abeef52c --- /dev/null +++ b/tools/perf/util/levenshtein.h | |||
@@ -0,0 +1,8 @@ | |||
1 | #ifndef LEVENSHTEIN_H | ||
2 | #define LEVENSHTEIN_H | ||
3 | |||
4 | int levenshtein(const char *string1, const char *string2, | ||
5 | int swap_penalty, int substition_penalty, | ||
6 | int insertion_penalty, int deletion_penalty); | ||
7 | |||
8 | #endif | ||
diff --git a/tools/perf/util/list.h b/tools/perf/util/list.h new file mode 100644 index 000000000000..e2548e8072cf --- /dev/null +++ b/tools/perf/util/list.h | |||
@@ -0,0 +1,603 @@ | |||
1 | #ifndef _LINUX_LIST_H | ||
2 | #define _LINUX_LIST_H | ||
3 | /* | ||
4 | Copyright (C) Cast of dozens, comes from the Linux kernel | ||
5 | |||
6 | This program is free software; you can redistribute it and/or modify it | ||
7 | under the terms of version 2 of the GNU General Public License as | ||
8 | published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <stddef.h> | ||
12 | |||
13 | /* | ||
14 | * These are non-NULL pointers that will result in page faults | ||
15 | * under normal circumstances, used to verify that nobody uses | ||
16 | * non-initialized list entries. | ||
17 | */ | ||
18 | #define LIST_POISON1 ((void *)0x00100100) | ||
19 | #define LIST_POISON2 ((void *)0x00200200) | ||
20 | |||
21 | /** | ||
22 | * container_of - cast a member of a structure out to the containing structure | ||
23 | * @ptr: the pointer to the member. | ||
24 | * @type: the type of the container struct this is embedded in. | ||
25 | * @member: the name of the member within the struct. | ||
26 | * | ||
27 | */ | ||
28 | #define container_of(ptr, type, member) ({ \ | ||
29 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ | ||
30 | (type *)( (char *)__mptr - offsetof(type,member) );}) | ||
31 | |||
32 | /* | ||
33 | * Simple doubly linked list implementation. | ||
34 | * | ||
35 | * Some of the internal functions ("__xxx") are useful when | ||
36 | * manipulating whole lists rather than single entries, as | ||
37 | * sometimes we already know the next/prev entries and we can | ||
38 | * generate better code by using them directly rather than | ||
39 | * using the generic single-entry routines. | ||
40 | */ | ||
41 | |||
42 | struct list_head { | ||
43 | struct list_head *next, *prev; | ||
44 | }; | ||
45 | |||
46 | #define LIST_HEAD_INIT(name) { &(name), &(name) } | ||
47 | |||
48 | #define LIST_HEAD(name) \ | ||
49 | struct list_head name = LIST_HEAD_INIT(name) | ||
50 | |||
51 | static inline void INIT_LIST_HEAD(struct list_head *list) | ||
52 | { | ||
53 | list->next = list; | ||
54 | list->prev = list; | ||
55 | } | ||
56 | |||
57 | /* | ||
58 | * Insert a new entry between two known consecutive entries. | ||
59 | * | ||
60 | * This is only for internal list manipulation where we know | ||
61 | * the prev/next entries already! | ||
62 | */ | ||
63 | static inline void __list_add(struct list_head *new, | ||
64 | struct list_head *prev, | ||
65 | struct list_head *next) | ||
66 | { | ||
67 | next->prev = new; | ||
68 | new->next = next; | ||
69 | new->prev = prev; | ||
70 | prev->next = new; | ||
71 | } | ||
72 | |||
73 | /** | ||
74 | * list_add - add a new entry | ||
75 | * @new: new entry to be added | ||
76 | * @head: list head to add it after | ||
77 | * | ||
78 | * Insert a new entry after the specified head. | ||
79 | * This is good for implementing stacks. | ||
80 | */ | ||
81 | static inline void list_add(struct list_head *new, struct list_head *head) | ||
82 | { | ||
83 | __list_add(new, head, head->next); | ||
84 | } | ||
85 | |||
86 | /** | ||
87 | * list_add_tail - add a new entry | ||
88 | * @new: new entry to be added | ||
89 | * @head: list head to add it before | ||
90 | * | ||
91 | * Insert a new entry before the specified head. | ||
92 | * This is useful for implementing queues. | ||
93 | */ | ||
94 | static inline void list_add_tail(struct list_head *new, struct list_head *head) | ||
95 | { | ||
96 | __list_add(new, head->prev, head); | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * Delete a list entry by making the prev/next entries | ||
101 | * point to each other. | ||
102 | * | ||
103 | * This is only for internal list manipulation where we know | ||
104 | * the prev/next entries already! | ||
105 | */ | ||
106 | static inline void __list_del(struct list_head * prev, struct list_head * next) | ||
107 | { | ||
108 | next->prev = prev; | ||
109 | prev->next = next; | ||
110 | } | ||
111 | |||
112 | /** | ||
113 | * list_del - deletes entry from list. | ||
114 | * @entry: the element to delete from the list. | ||
115 | * Note: list_empty on entry does not return true after this, the entry is | ||
116 | * in an undefined state. | ||
117 | */ | ||
118 | static inline void list_del(struct list_head *entry) | ||
119 | { | ||
120 | __list_del(entry->prev, entry->next); | ||
121 | entry->next = LIST_POISON1; | ||
122 | entry->prev = LIST_POISON2; | ||
123 | } | ||
124 | |||
125 | /** | ||
126 | * list_del_range - deletes range of entries from list. | ||
127 | * @beging: first element in the range to delete from the list. | ||
128 | * @beging: first element in the range to delete from the list. | ||
129 | * Note: list_empty on the range of entries does not return true after this, | ||
130 | * the entries is in an undefined state. | ||
131 | */ | ||
132 | static inline void list_del_range(struct list_head *begin, | ||
133 | struct list_head *end) | ||
134 | { | ||
135 | begin->prev->next = end->next; | ||
136 | end->next->prev = begin->prev; | ||
137 | } | ||
138 | |||
139 | /** | ||
140 | * list_replace - replace old entry by new one | ||
141 | * @old : the element to be replaced | ||
142 | * @new : the new element to insert | ||
143 | * Note: if 'old' was empty, it will be overwritten. | ||
144 | */ | ||
145 | static inline void list_replace(struct list_head *old, | ||
146 | struct list_head *new) | ||
147 | { | ||
148 | new->next = old->next; | ||
149 | new->next->prev = new; | ||
150 | new->prev = old->prev; | ||
151 | new->prev->next = new; | ||
152 | } | ||
153 | |||
154 | static inline void list_replace_init(struct list_head *old, | ||
155 | struct list_head *new) | ||
156 | { | ||
157 | list_replace(old, new); | ||
158 | INIT_LIST_HEAD(old); | ||
159 | } | ||
160 | |||
161 | /** | ||
162 | * list_del_init - deletes entry from list and reinitialize it. | ||
163 | * @entry: the element to delete from the list. | ||
164 | */ | ||
165 | static inline void list_del_init(struct list_head *entry) | ||
166 | { | ||
167 | __list_del(entry->prev, entry->next); | ||
168 | INIT_LIST_HEAD(entry); | ||
169 | } | ||
170 | |||
171 | /** | ||
172 | * list_move - delete from one list and add as another's head | ||
173 | * @list: the entry to move | ||
174 | * @head: the head that will precede our entry | ||
175 | */ | ||
176 | static inline void list_move(struct list_head *list, struct list_head *head) | ||
177 | { | ||
178 | __list_del(list->prev, list->next); | ||
179 | list_add(list, head); | ||
180 | } | ||
181 | |||
182 | /** | ||
183 | * list_move_tail - delete from one list and add as another's tail | ||
184 | * @list: the entry to move | ||
185 | * @head: the head that will follow our entry | ||
186 | */ | ||
187 | static inline void list_move_tail(struct list_head *list, | ||
188 | struct list_head *head) | ||
189 | { | ||
190 | __list_del(list->prev, list->next); | ||
191 | list_add_tail(list, head); | ||
192 | } | ||
193 | |||
194 | /** | ||
195 | * list_is_last - tests whether @list is the last entry in list @head | ||
196 | * @list: the entry to test | ||
197 | * @head: the head of the list | ||
198 | */ | ||
199 | static inline int list_is_last(const struct list_head *list, | ||
200 | const struct list_head *head) | ||
201 | { | ||
202 | return list->next == head; | ||
203 | } | ||
204 | |||
205 | /** | ||
206 | * list_empty - tests whether a list is empty | ||
207 | * @head: the list to test. | ||
208 | */ | ||
209 | static inline int list_empty(const struct list_head *head) | ||
210 | { | ||
211 | return head->next == head; | ||
212 | } | ||
213 | |||
214 | /** | ||
215 | * list_empty_careful - tests whether a list is empty and not being modified | ||
216 | * @head: the list to test | ||
217 | * | ||
218 | * Description: | ||
219 | * tests whether a list is empty _and_ checks that no other CPU might be | ||
220 | * in the process of modifying either member (next or prev) | ||
221 | * | ||
222 | * NOTE: using list_empty_careful() without synchronization | ||
223 | * can only be safe if the only activity that can happen | ||
224 | * to the list entry is list_del_init(). Eg. it cannot be used | ||
225 | * if another CPU could re-list_add() it. | ||
226 | */ | ||
227 | static inline int list_empty_careful(const struct list_head *head) | ||
228 | { | ||
229 | struct list_head *next = head->next; | ||
230 | return (next == head) && (next == head->prev); | ||
231 | } | ||
232 | |||
233 | static inline void __list_splice(struct list_head *list, | ||
234 | struct list_head *head) | ||
235 | { | ||
236 | struct list_head *first = list->next; | ||
237 | struct list_head *last = list->prev; | ||
238 | struct list_head *at = head->next; | ||
239 | |||
240 | first->prev = head; | ||
241 | head->next = first; | ||
242 | |||
243 | last->next = at; | ||
244 | at->prev = last; | ||
245 | } | ||
246 | |||
247 | /** | ||
248 | * list_splice - join two lists | ||
249 | * @list: the new list to add. | ||
250 | * @head: the place to add it in the first list. | ||
251 | */ | ||
252 | static inline void list_splice(struct list_head *list, struct list_head *head) | ||
253 | { | ||
254 | if (!list_empty(list)) | ||
255 | __list_splice(list, head); | ||
256 | } | ||
257 | |||
258 | /** | ||
259 | * list_splice_init - join two lists and reinitialise the emptied list. | ||
260 | * @list: the new list to add. | ||
261 | * @head: the place to add it in the first list. | ||
262 | * | ||
263 | * The list at @list is reinitialised | ||
264 | */ | ||
265 | static inline void list_splice_init(struct list_head *list, | ||
266 | struct list_head *head) | ||
267 | { | ||
268 | if (!list_empty(list)) { | ||
269 | __list_splice(list, head); | ||
270 | INIT_LIST_HEAD(list); | ||
271 | } | ||
272 | } | ||
273 | |||
274 | /** | ||
275 | * list_entry - get the struct for this entry | ||
276 | * @ptr: the &struct list_head pointer. | ||
277 | * @type: the type of the struct this is embedded in. | ||
278 | * @member: the name of the list_struct within the struct. | ||
279 | */ | ||
280 | #define list_entry(ptr, type, member) \ | ||
281 | container_of(ptr, type, member) | ||
282 | |||
283 | /** | ||
284 | * list_first_entry - get the first element from a list | ||
285 | * @ptr: the list head to take the element from. | ||
286 | * @type: the type of the struct this is embedded in. | ||
287 | * @member: the name of the list_struct within the struct. | ||
288 | * | ||
289 | * Note, that list is expected to be not empty. | ||
290 | */ | ||
291 | #define list_first_entry(ptr, type, member) \ | ||
292 | list_entry((ptr)->next, type, member) | ||
293 | |||
294 | /** | ||
295 | * list_for_each - iterate over a list | ||
296 | * @pos: the &struct list_head to use as a loop cursor. | ||
297 | * @head: the head for your list. | ||
298 | */ | ||
299 | #define list_for_each(pos, head) \ | ||
300 | for (pos = (head)->next; pos != (head); \ | ||
301 | pos = pos->next) | ||
302 | |||
303 | /** | ||
304 | * __list_for_each - iterate over a list | ||
305 | * @pos: the &struct list_head to use as a loop cursor. | ||
306 | * @head: the head for your list. | ||
307 | * | ||
308 | * This variant differs from list_for_each() in that it's the | ||
309 | * simplest possible list iteration code, no prefetching is done. | ||
310 | * Use this for code that knows the list to be very short (empty | ||
311 | * or 1 entry) most of the time. | ||
312 | */ | ||
313 | #define __list_for_each(pos, head) \ | ||
314 | for (pos = (head)->next; pos != (head); pos = pos->next) | ||
315 | |||
316 | /** | ||
317 | * list_for_each_prev - iterate over a list backwards | ||
318 | * @pos: the &struct list_head to use as a loop cursor. | ||
319 | * @head: the head for your list. | ||
320 | */ | ||
321 | #define list_for_each_prev(pos, head) \ | ||
322 | for (pos = (head)->prev; pos != (head); \ | ||
323 | pos = pos->prev) | ||
324 | |||
325 | /** | ||
326 | * list_for_each_safe - iterate over a list safe against removal of list entry | ||
327 | * @pos: the &struct list_head to use as a loop cursor. | ||
328 | * @n: another &struct list_head to use as temporary storage | ||
329 | * @head: the head for your list. | ||
330 | */ | ||
331 | #define list_for_each_safe(pos, n, head) \ | ||
332 | for (pos = (head)->next, n = pos->next; pos != (head); \ | ||
333 | pos = n, n = pos->next) | ||
334 | |||
335 | /** | ||
336 | * list_for_each_entry - iterate over list of given type | ||
337 | * @pos: the type * to use as a loop cursor. | ||
338 | * @head: the head for your list. | ||
339 | * @member: the name of the list_struct within the struct. | ||
340 | */ | ||
341 | #define list_for_each_entry(pos, head, member) \ | ||
342 | for (pos = list_entry((head)->next, typeof(*pos), member); \ | ||
343 | &pos->member != (head); \ | ||
344 | pos = list_entry(pos->member.next, typeof(*pos), member)) | ||
345 | |||
346 | /** | ||
347 | * list_for_each_entry_reverse - iterate backwards over list of given type. | ||
348 | * @pos: the type * to use as a loop cursor. | ||
349 | * @head: the head for your list. | ||
350 | * @member: the name of the list_struct within the struct. | ||
351 | */ | ||
352 | #define list_for_each_entry_reverse(pos, head, member) \ | ||
353 | for (pos = list_entry((head)->prev, typeof(*pos), member); \ | ||
354 | &pos->member != (head); \ | ||
355 | pos = list_entry(pos->member.prev, typeof(*pos), member)) | ||
356 | |||
357 | /** | ||
358 | * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue | ||
359 | * @pos: the type * to use as a start point | ||
360 | * @head: the head of the list | ||
361 | * @member: the name of the list_struct within the struct. | ||
362 | * | ||
363 | * Prepares a pos entry for use as a start point in list_for_each_entry_continue. | ||
364 | */ | ||
365 | #define list_prepare_entry(pos, head, member) \ | ||
366 | ((pos) ? : list_entry(head, typeof(*pos), member)) | ||
367 | |||
368 | /** | ||
369 | * list_for_each_entry_continue - continue iteration over list of given type | ||
370 | * @pos: the type * to use as a loop cursor. | ||
371 | * @head: the head for your list. | ||
372 | * @member: the name of the list_struct within the struct. | ||
373 | * | ||
374 | * Continue to iterate over list of given type, continuing after | ||
375 | * the current position. | ||
376 | */ | ||
377 | #define list_for_each_entry_continue(pos, head, member) \ | ||
378 | for (pos = list_entry(pos->member.next, typeof(*pos), member); \ | ||
379 | &pos->member != (head); \ | ||
380 | pos = list_entry(pos->member.next, typeof(*pos), member)) | ||
381 | |||
382 | /** | ||
383 | * list_for_each_entry_from - iterate over list of given type from the current point | ||
384 | * @pos: the type * to use as a loop cursor. | ||
385 | * @head: the head for your list. | ||
386 | * @member: the name of the list_struct within the struct. | ||
387 | * | ||
388 | * Iterate over list of given type, continuing from current position. | ||
389 | */ | ||
390 | #define list_for_each_entry_from(pos, head, member) \ | ||
391 | for (; &pos->member != (head); \ | ||
392 | pos = list_entry(pos->member.next, typeof(*pos), member)) | ||
393 | |||
394 | /** | ||
395 | * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry | ||
396 | * @pos: the type * to use as a loop cursor. | ||
397 | * @n: another type * to use as temporary storage | ||
398 | * @head: the head for your list. | ||
399 | * @member: the name of the list_struct within the struct. | ||
400 | */ | ||
401 | #define list_for_each_entry_safe(pos, n, head, member) \ | ||
402 | for (pos = list_entry((head)->next, typeof(*pos), member), \ | ||
403 | n = list_entry(pos->member.next, typeof(*pos), member); \ | ||
404 | &pos->member != (head); \ | ||
405 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) | ||
406 | |||
407 | /** | ||
408 | * list_for_each_entry_safe_continue | ||
409 | * @pos: the type * to use as a loop cursor. | ||
410 | * @n: another type * to use as temporary storage | ||
411 | * @head: the head for your list. | ||
412 | * @member: the name of the list_struct within the struct. | ||
413 | * | ||
414 | * Iterate over list of given type, continuing after current point, | ||
415 | * safe against removal of list entry. | ||
416 | */ | ||
417 | #define list_for_each_entry_safe_continue(pos, n, head, member) \ | ||
418 | for (pos = list_entry(pos->member.next, typeof(*pos), member), \ | ||
419 | n = list_entry(pos->member.next, typeof(*pos), member); \ | ||
420 | &pos->member != (head); \ | ||
421 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) | ||
422 | |||
423 | /** | ||
424 | * list_for_each_entry_safe_from | ||
425 | * @pos: the type * to use as a loop cursor. | ||
426 | * @n: another type * to use as temporary storage | ||
427 | * @head: the head for your list. | ||
428 | * @member: the name of the list_struct within the struct. | ||
429 | * | ||
430 | * Iterate over list of given type from current point, safe against | ||
431 | * removal of list entry. | ||
432 | */ | ||
433 | #define list_for_each_entry_safe_from(pos, n, head, member) \ | ||
434 | for (n = list_entry(pos->member.next, typeof(*pos), member); \ | ||
435 | &pos->member != (head); \ | ||
436 | pos = n, n = list_entry(n->member.next, typeof(*n), member)) | ||
437 | |||
438 | /** | ||
439 | * list_for_each_entry_safe_reverse | ||
440 | * @pos: the type * to use as a loop cursor. | ||
441 | * @n: another type * to use as temporary storage | ||
442 | * @head: the head for your list. | ||
443 | * @member: the name of the list_struct within the struct. | ||
444 | * | ||
445 | * Iterate backwards over list of given type, safe against removal | ||
446 | * of list entry. | ||
447 | */ | ||
448 | #define list_for_each_entry_safe_reverse(pos, n, head, member) \ | ||
449 | for (pos = list_entry((head)->prev, typeof(*pos), member), \ | ||
450 | n = list_entry(pos->member.prev, typeof(*pos), member); \ | ||
451 | &pos->member != (head); \ | ||
452 | pos = n, n = list_entry(n->member.prev, typeof(*n), member)) | ||
453 | |||
454 | /* | ||
455 | * Double linked lists with a single pointer list head. | ||
456 | * Mostly useful for hash tables where the two pointer list head is | ||
457 | * too wasteful. | ||
458 | * You lose the ability to access the tail in O(1). | ||
459 | */ | ||
460 | |||
461 | struct hlist_head { | ||
462 | struct hlist_node *first; | ||
463 | }; | ||
464 | |||
465 | struct hlist_node { | ||
466 | struct hlist_node *next, **pprev; | ||
467 | }; | ||
468 | |||
469 | #define HLIST_HEAD_INIT { .first = NULL } | ||
470 | #define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } | ||
471 | #define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) | ||
472 | static inline void INIT_HLIST_NODE(struct hlist_node *h) | ||
473 | { | ||
474 | h->next = NULL; | ||
475 | h->pprev = NULL; | ||
476 | } | ||
477 | |||
478 | static inline int hlist_unhashed(const struct hlist_node *h) | ||
479 | { | ||
480 | return !h->pprev; | ||
481 | } | ||
482 | |||
483 | static inline int hlist_empty(const struct hlist_head *h) | ||
484 | { | ||
485 | return !h->first; | ||
486 | } | ||
487 | |||
488 | static inline void __hlist_del(struct hlist_node *n) | ||
489 | { | ||
490 | struct hlist_node *next = n->next; | ||
491 | struct hlist_node **pprev = n->pprev; | ||
492 | *pprev = next; | ||
493 | if (next) | ||
494 | next->pprev = pprev; | ||
495 | } | ||
496 | |||
497 | static inline void hlist_del(struct hlist_node *n) | ||
498 | { | ||
499 | __hlist_del(n); | ||
500 | n->next = LIST_POISON1; | ||
501 | n->pprev = LIST_POISON2; | ||
502 | } | ||
503 | |||
504 | static inline void hlist_del_init(struct hlist_node *n) | ||
505 | { | ||
506 | if (!hlist_unhashed(n)) { | ||
507 | __hlist_del(n); | ||
508 | INIT_HLIST_NODE(n); | ||
509 | } | ||
510 | } | ||
511 | |||
512 | static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) | ||
513 | { | ||
514 | struct hlist_node *first = h->first; | ||
515 | n->next = first; | ||
516 | if (first) | ||
517 | first->pprev = &n->next; | ||
518 | h->first = n; | ||
519 | n->pprev = &h->first; | ||
520 | } | ||
521 | |||
522 | /* next must be != NULL */ | ||
523 | static inline void hlist_add_before(struct hlist_node *n, | ||
524 | struct hlist_node *next) | ||
525 | { | ||
526 | n->pprev = next->pprev; | ||
527 | n->next = next; | ||
528 | next->pprev = &n->next; | ||
529 | *(n->pprev) = n; | ||
530 | } | ||
531 | |||
532 | static inline void hlist_add_after(struct hlist_node *n, | ||
533 | struct hlist_node *next) | ||
534 | { | ||
535 | next->next = n->next; | ||
536 | n->next = next; | ||
537 | next->pprev = &n->next; | ||
538 | |||
539 | if(next->next) | ||
540 | next->next->pprev = &next->next; | ||
541 | } | ||
542 | |||
543 | #define hlist_entry(ptr, type, member) container_of(ptr,type,member) | ||
544 | |||
545 | #define hlist_for_each(pos, head) \ | ||
546 | for (pos = (head)->first; pos; \ | ||
547 | pos = pos->next) | ||
548 | |||
549 | #define hlist_for_each_safe(pos, n, head) \ | ||
550 | for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ | ||
551 | pos = n) | ||
552 | |||
553 | /** | ||
554 | * hlist_for_each_entry - iterate over list of given type | ||
555 | * @tpos: the type * to use as a loop cursor. | ||
556 | * @pos: the &struct hlist_node to use as a loop cursor. | ||
557 | * @head: the head for your list. | ||
558 | * @member: the name of the hlist_node within the struct. | ||
559 | */ | ||
560 | #define hlist_for_each_entry(tpos, pos, head, member) \ | ||
561 | for (pos = (head)->first; \ | ||
562 | pos && \ | ||
563 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ | ||
564 | pos = pos->next) | ||
565 | |||
566 | /** | ||
567 | * hlist_for_each_entry_continue - iterate over a hlist continuing after current point | ||
568 | * @tpos: the type * to use as a loop cursor. | ||
569 | * @pos: the &struct hlist_node to use as a loop cursor. | ||
570 | * @member: the name of the hlist_node within the struct. | ||
571 | */ | ||
572 | #define hlist_for_each_entry_continue(tpos, pos, member) \ | ||
573 | for (pos = (pos)->next; \ | ||
574 | pos && \ | ||
575 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ | ||
576 | pos = pos->next) | ||
577 | |||
578 | /** | ||
579 | * hlist_for_each_entry_from - iterate over a hlist continuing from current point | ||
580 | * @tpos: the type * to use as a loop cursor. | ||
581 | * @pos: the &struct hlist_node to use as a loop cursor. | ||
582 | * @member: the name of the hlist_node within the struct. | ||
583 | */ | ||
584 | #define hlist_for_each_entry_from(tpos, pos, member) \ | ||
585 | for (; pos && \ | ||
586 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ | ||
587 | pos = pos->next) | ||
588 | |||
589 | /** | ||
590 | * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry | ||
591 | * @tpos: the type * to use as a loop cursor. | ||
592 | * @pos: the &struct hlist_node to use as a loop cursor. | ||
593 | * @n: another &struct hlist_node to use as temporary storage | ||
594 | * @head: the head for your list. | ||
595 | * @member: the name of the hlist_node within the struct. | ||
596 | */ | ||
597 | #define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ | ||
598 | for (pos = (head)->first; \ | ||
599 | pos && ({ n = pos->next; 1; }) && \ | ||
600 | ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ | ||
601 | pos = n) | ||
602 | |||
603 | #endif | ||
diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c new file mode 100644 index 000000000000..a28bccae5458 --- /dev/null +++ b/tools/perf/util/pager.c | |||
@@ -0,0 +1,99 @@ | |||
1 | #include "cache.h" | ||
2 | #include "run-command.h" | ||
3 | #include "sigchain.h" | ||
4 | |||
5 | /* | ||
6 | * This is split up from the rest of git so that we can do | ||
7 | * something different on Windows. | ||
8 | */ | ||
9 | |||
10 | static int spawned_pager; | ||
11 | |||
12 | #ifndef __MINGW32__ | ||
13 | static void pager_preexec(void) | ||
14 | { | ||
15 | /* | ||
16 | * Work around bug in "less" by not starting it until we | ||
17 | * have real input | ||
18 | */ | ||
19 | fd_set in; | ||
20 | |||
21 | FD_ZERO(&in); | ||
22 | FD_SET(0, &in); | ||
23 | select(1, &in, NULL, &in, NULL); | ||
24 | |||
25 | setenv("LESS", "FRSX", 0); | ||
26 | } | ||
27 | #endif | ||
28 | |||
29 | static const char *pager_argv[] = { "sh", "-c", NULL, NULL }; | ||
30 | static struct child_process pager_process; | ||
31 | |||
32 | static void wait_for_pager(void) | ||
33 | { | ||
34 | fflush(stdout); | ||
35 | fflush(stderr); | ||
36 | /* signal EOF to pager */ | ||
37 | close(1); | ||
38 | close(2); | ||
39 | finish_command(&pager_process); | ||
40 | } | ||
41 | |||
42 | static void wait_for_pager_signal(int signo) | ||
43 | { | ||
44 | wait_for_pager(); | ||
45 | sigchain_pop(signo); | ||
46 | raise(signo); | ||
47 | } | ||
48 | |||
49 | void setup_pager(void) | ||
50 | { | ||
51 | const char *pager = getenv("PERF_PAGER"); | ||
52 | |||
53 | if (!isatty(1)) | ||
54 | return; | ||
55 | if (!pager) { | ||
56 | if (!pager_program) | ||
57 | perf_config(perf_default_config, NULL); | ||
58 | pager = pager_program; | ||
59 | } | ||
60 | if (!pager) | ||
61 | pager = getenv("PAGER"); | ||
62 | if (!pager) | ||
63 | pager = "less"; | ||
64 | else if (!*pager || !strcmp(pager, "cat")) | ||
65 | return; | ||
66 | |||
67 | spawned_pager = 1; /* means we are emitting to terminal */ | ||
68 | |||
69 | /* spawn the pager */ | ||
70 | pager_argv[2] = pager; | ||
71 | pager_process.argv = pager_argv; | ||
72 | pager_process.in = -1; | ||
73 | #ifndef __MINGW32__ | ||
74 | pager_process.preexec_cb = pager_preexec; | ||
75 | #endif | ||
76 | if (start_command(&pager_process)) | ||
77 | return; | ||
78 | |||
79 | /* original process continues, but writes to the pipe */ | ||
80 | dup2(pager_process.in, 1); | ||
81 | if (isatty(2)) | ||
82 | dup2(pager_process.in, 2); | ||
83 | close(pager_process.in); | ||
84 | |||
85 | /* this makes sure that the parent terminates after the pager */ | ||
86 | sigchain_push_common(wait_for_pager_signal); | ||
87 | atexit(wait_for_pager); | ||
88 | } | ||
89 | |||
90 | int pager_in_use(void) | ||
91 | { | ||
92 | const char *env; | ||
93 | |||
94 | if (spawned_pager) | ||
95 | return 1; | ||
96 | |||
97 | env = getenv("PERF_PAGER_IN_USE"); | ||
98 | return env ? perf_config_bool("PERF_PAGER_IN_USE", env) : 0; | ||
99 | } | ||
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c new file mode 100644 index 000000000000..9d5f1ca50e6f --- /dev/null +++ b/tools/perf/util/parse-events.c | |||
@@ -0,0 +1,316 @@ | |||
1 | |||
2 | #include "../perf.h" | ||
3 | #include "util.h" | ||
4 | #include "parse-options.h" | ||
5 | #include "parse-events.h" | ||
6 | #include "exec_cmd.h" | ||
7 | #include "string.h" | ||
8 | |||
9 | extern char *strcasestr(const char *haystack, const char *needle); | ||
10 | |||
11 | int nr_counters; | ||
12 | |||
13 | struct perf_counter_attr attrs[MAX_COUNTERS]; | ||
14 | |||
15 | struct event_symbol { | ||
16 | __u8 type; | ||
17 | __u64 config; | ||
18 | char *symbol; | ||
19 | }; | ||
20 | |||
21 | #define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y | ||
22 | #define CR(x, y) .type = PERF_TYPE_##x, .config = y | ||
23 | |||
24 | static struct event_symbol event_symbols[] = { | ||
25 | { C(HARDWARE, HW_CPU_CYCLES), "cpu-cycles", }, | ||
26 | { C(HARDWARE, HW_CPU_CYCLES), "cycles", }, | ||
27 | { C(HARDWARE, HW_INSTRUCTIONS), "instructions", }, | ||
28 | { C(HARDWARE, HW_CACHE_REFERENCES), "cache-references", }, | ||
29 | { C(HARDWARE, HW_CACHE_MISSES), "cache-misses", }, | ||
30 | { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branch-instructions", }, | ||
31 | { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branches", }, | ||
32 | { C(HARDWARE, HW_BRANCH_MISSES), "branch-misses", }, | ||
33 | { C(HARDWARE, HW_BUS_CYCLES), "bus-cycles", }, | ||
34 | |||
35 | { C(SOFTWARE, SW_CPU_CLOCK), "cpu-clock", }, | ||
36 | { C(SOFTWARE, SW_TASK_CLOCK), "task-clock", }, | ||
37 | { C(SOFTWARE, SW_PAGE_FAULTS), "page-faults", }, | ||
38 | { C(SOFTWARE, SW_PAGE_FAULTS), "faults", }, | ||
39 | { C(SOFTWARE, SW_PAGE_FAULTS_MIN), "minor-faults", }, | ||
40 | { C(SOFTWARE, SW_PAGE_FAULTS_MAJ), "major-faults", }, | ||
41 | { C(SOFTWARE, SW_CONTEXT_SWITCHES), "context-switches", }, | ||
42 | { C(SOFTWARE, SW_CONTEXT_SWITCHES), "cs", }, | ||
43 | { C(SOFTWARE, SW_CPU_MIGRATIONS), "cpu-migrations", }, | ||
44 | { C(SOFTWARE, SW_CPU_MIGRATIONS), "migrations", }, | ||
45 | }; | ||
46 | |||
47 | #define __PERF_COUNTER_FIELD(config, name) \ | ||
48 | ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) | ||
49 | |||
50 | #define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) | ||
51 | #define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) | ||
52 | #define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) | ||
53 | #define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) | ||
54 | |||
55 | static char *hw_event_names[] = { | ||
56 | "cycles", | ||
57 | "instructions", | ||
58 | "cache-references", | ||
59 | "cache-misses", | ||
60 | "branches", | ||
61 | "branch-misses", | ||
62 | "bus-cycles", | ||
63 | }; | ||
64 | |||
65 | static char *sw_event_names[] = { | ||
66 | "cpu-clock-ticks", | ||
67 | "task-clock-ticks", | ||
68 | "page-faults", | ||
69 | "context-switches", | ||
70 | "CPU-migrations", | ||
71 | "minor-faults", | ||
72 | "major-faults", | ||
73 | }; | ||
74 | |||
75 | #define MAX_ALIASES 8 | ||
76 | |||
77 | static char *hw_cache [][MAX_ALIASES] = { | ||
78 | { "L1-data" , "l1-d", "l1d", "l1" }, | ||
79 | { "L1-instruction" , "l1-i", "l1i" }, | ||
80 | { "L2" , "l2" }, | ||
81 | { "Data-TLB" , "dtlb", "d-tlb" }, | ||
82 | { "Instruction-TLB" , "itlb", "i-tlb" }, | ||
83 | { "Branch" , "bpu" , "btb", "bpc" }, | ||
84 | }; | ||
85 | |||
86 | static char *hw_cache_op [][MAX_ALIASES] = { | ||
87 | { "Load" , "read" }, | ||
88 | { "Store" , "write" }, | ||
89 | { "Prefetch" , "speculative-read", "speculative-load" }, | ||
90 | }; | ||
91 | |||
92 | static char *hw_cache_result [][MAX_ALIASES] = { | ||
93 | { "Reference" , "ops", "access" }, | ||
94 | { "Miss" }, | ||
95 | }; | ||
96 | |||
97 | char *event_name(int counter) | ||
98 | { | ||
99 | __u64 config = attrs[counter].config; | ||
100 | int type = attrs[counter].type; | ||
101 | static char buf[32]; | ||
102 | |||
103 | if (attrs[counter].type == PERF_TYPE_RAW) { | ||
104 | sprintf(buf, "raw 0x%llx", config); | ||
105 | return buf; | ||
106 | } | ||
107 | |||
108 | switch (type) { | ||
109 | case PERF_TYPE_HARDWARE: | ||
110 | if (config < PERF_COUNT_HW_MAX) | ||
111 | return hw_event_names[config]; | ||
112 | return "unknown-hardware"; | ||
113 | |||
114 | case PERF_TYPE_HW_CACHE: { | ||
115 | __u8 cache_type, cache_op, cache_result; | ||
116 | static char name[100]; | ||
117 | |||
118 | cache_type = (config >> 0) & 0xff; | ||
119 | if (cache_type > PERF_COUNT_HW_CACHE_MAX) | ||
120 | return "unknown-ext-hardware-cache-type"; | ||
121 | |||
122 | cache_op = (config >> 8) & 0xff; | ||
123 | if (cache_op > PERF_COUNT_HW_CACHE_OP_MAX) | ||
124 | return "unknown-ext-hardware-cache-op"; | ||
125 | |||
126 | cache_result = (config >> 16) & 0xff; | ||
127 | if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
128 | return "unknown-ext-hardware-cache-result"; | ||
129 | |||
130 | sprintf(name, "%s-Cache-%s-%ses", | ||
131 | hw_cache[cache_type][0], | ||
132 | hw_cache_op[cache_op][0], | ||
133 | hw_cache_result[cache_result][0]); | ||
134 | |||
135 | return name; | ||
136 | } | ||
137 | |||
138 | case PERF_TYPE_SOFTWARE: | ||
139 | if (config < PERF_COUNT_SW_MAX) | ||
140 | return sw_event_names[config]; | ||
141 | return "unknown-software"; | ||
142 | |||
143 | default: | ||
144 | break; | ||
145 | } | ||
146 | |||
147 | return "unknown"; | ||
148 | } | ||
149 | |||
150 | static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size) | ||
151 | { | ||
152 | int i, j; | ||
153 | |||
154 | for (i = 0; i < size; i++) { | ||
155 | for (j = 0; j < MAX_ALIASES; j++) { | ||
156 | if (!names[i][j]) | ||
157 | break; | ||
158 | if (strcasestr(str, names[i][j])) | ||
159 | return i; | ||
160 | } | ||
161 | } | ||
162 | |||
163 | return -1; | ||
164 | } | ||
165 | |||
166 | static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) | ||
167 | { | ||
168 | int cache_type = -1, cache_op = 0, cache_result = 0; | ||
169 | |||
170 | cache_type = parse_aliases(str, hw_cache, PERF_COUNT_HW_CACHE_MAX); | ||
171 | /* | ||
172 | * No fallback - if we cannot get a clear cache type | ||
173 | * then bail out: | ||
174 | */ | ||
175 | if (cache_type == -1) | ||
176 | return -EINVAL; | ||
177 | |||
178 | cache_op = parse_aliases(str, hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX); | ||
179 | /* | ||
180 | * Fall back to reads: | ||
181 | */ | ||
182 | if (cache_op == -1) | ||
183 | cache_op = PERF_COUNT_HW_CACHE_OP_READ; | ||
184 | |||
185 | cache_result = parse_aliases(str, hw_cache_result, | ||
186 | PERF_COUNT_HW_CACHE_RESULT_MAX); | ||
187 | /* | ||
188 | * Fall back to accesses: | ||
189 | */ | ||
190 | if (cache_result == -1) | ||
191 | cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS; | ||
192 | |||
193 | attr->config = cache_type | (cache_op << 8) | (cache_result << 16); | ||
194 | attr->type = PERF_TYPE_HW_CACHE; | ||
195 | |||
196 | return 0; | ||
197 | } | ||
198 | |||
199 | /* | ||
200 | * Each event can have multiple symbolic names. | ||
201 | * Symbolic names are (almost) exactly matched. | ||
202 | */ | ||
203 | static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) | ||
204 | { | ||
205 | __u64 config, id; | ||
206 | int type; | ||
207 | unsigned int i; | ||
208 | const char *sep, *pstr; | ||
209 | |||
210 | if (str[0] == 'r' && hex2u64(str + 1, &config) > 0) { | ||
211 | attr->type = PERF_TYPE_RAW; | ||
212 | attr->config = config; | ||
213 | |||
214 | return 0; | ||
215 | } | ||
216 | |||
217 | pstr = str; | ||
218 | sep = strchr(pstr, ':'); | ||
219 | if (sep) { | ||
220 | type = atoi(pstr); | ||
221 | pstr = sep + 1; | ||
222 | id = atoi(pstr); | ||
223 | sep = strchr(pstr, ':'); | ||
224 | if (sep) { | ||
225 | pstr = sep + 1; | ||
226 | if (strchr(pstr, 'k')) | ||
227 | attr->exclude_user = 1; | ||
228 | if (strchr(pstr, 'u')) | ||
229 | attr->exclude_kernel = 1; | ||
230 | } | ||
231 | attr->type = type; | ||
232 | attr->config = id; | ||
233 | |||
234 | return 0; | ||
235 | } | ||
236 | |||
237 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { | ||
238 | if (!strncmp(str, event_symbols[i].symbol, | ||
239 | strlen(event_symbols[i].symbol))) { | ||
240 | |||
241 | attr->type = event_symbols[i].type; | ||
242 | attr->config = event_symbols[i].config; | ||
243 | |||
244 | return 0; | ||
245 | } | ||
246 | } | ||
247 | |||
248 | return parse_generic_hw_symbols(str, attr); | ||
249 | } | ||
250 | |||
251 | int parse_events(const struct option *opt, const char *str, int unset) | ||
252 | { | ||
253 | struct perf_counter_attr attr; | ||
254 | int ret; | ||
255 | |||
256 | memset(&attr, 0, sizeof(attr)); | ||
257 | again: | ||
258 | if (nr_counters == MAX_COUNTERS) | ||
259 | return -1; | ||
260 | |||
261 | ret = parse_event_symbols(str, &attr); | ||
262 | if (ret < 0) | ||
263 | return ret; | ||
264 | |||
265 | attrs[nr_counters] = attr; | ||
266 | nr_counters++; | ||
267 | |||
268 | str = strstr(str, ","); | ||
269 | if (str) { | ||
270 | str++; | ||
271 | goto again; | ||
272 | } | ||
273 | |||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | static const char * const event_type_descriptors[] = { | ||
278 | "", | ||
279 | "Hardware event", | ||
280 | "Software event", | ||
281 | "Tracepoint event", | ||
282 | "Hardware cache event", | ||
283 | }; | ||
284 | |||
285 | /* | ||
286 | * Print the help text for the event symbols: | ||
287 | */ | ||
288 | void print_events(void) | ||
289 | { | ||
290 | struct event_symbol *syms = event_symbols; | ||
291 | unsigned int i, type, prev_type = -1; | ||
292 | |||
293 | fprintf(stderr, "\n"); | ||
294 | fprintf(stderr, "List of pre-defined events (to be used in -e):\n"); | ||
295 | |||
296 | for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { | ||
297 | type = syms->type + 1; | ||
298 | if (type > ARRAY_SIZE(event_type_descriptors)) | ||
299 | type = 0; | ||
300 | |||
301 | if (type != prev_type) | ||
302 | fprintf(stderr, "\n"); | ||
303 | |||
304 | fprintf(stderr, " %-30s [%s]\n", syms->symbol, | ||
305 | event_type_descriptors[type]); | ||
306 | |||
307 | prev_type = type; | ||
308 | } | ||
309 | |||
310 | fprintf(stderr, "\n"); | ||
311 | fprintf(stderr, " %-30s [raw hardware event descriptor]\n", | ||
312 | "rNNN"); | ||
313 | fprintf(stderr, "\n"); | ||
314 | |||
315 | exit(129); | ||
316 | } | ||
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h new file mode 100644 index 000000000000..e3d552908e60 --- /dev/null +++ b/tools/perf/util/parse-events.h | |||
@@ -0,0 +1,17 @@ | |||
1 | |||
2 | /* | ||
3 | * Parse symbolic events/counts passed in as options: | ||
4 | */ | ||
5 | |||
6 | extern int nr_counters; | ||
7 | |||
8 | extern struct perf_counter_attr attrs[MAX_COUNTERS]; | ||
9 | |||
10 | extern char *event_name(int ctr); | ||
11 | |||
12 | extern int parse_events(const struct option *opt, const char *str, int unset); | ||
13 | |||
14 | #define EVENTS_HELP_MAX (128*1024) | ||
15 | |||
16 | extern void print_events(void); | ||
17 | |||
diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c new file mode 100644 index 000000000000..b3affb1658d2 --- /dev/null +++ b/tools/perf/util/parse-options.c | |||
@@ -0,0 +1,508 @@ | |||
1 | #include "util.h" | ||
2 | #include "parse-options.h" | ||
3 | #include "cache.h" | ||
4 | |||
5 | #define OPT_SHORT 1 | ||
6 | #define OPT_UNSET 2 | ||
7 | |||
8 | static int opterror(const struct option *opt, const char *reason, int flags) | ||
9 | { | ||
10 | if (flags & OPT_SHORT) | ||
11 | return error("switch `%c' %s", opt->short_name, reason); | ||
12 | if (flags & OPT_UNSET) | ||
13 | return error("option `no-%s' %s", opt->long_name, reason); | ||
14 | return error("option `%s' %s", opt->long_name, reason); | ||
15 | } | ||
16 | |||
17 | static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt, | ||
18 | int flags, const char **arg) | ||
19 | { | ||
20 | if (p->opt) { | ||
21 | *arg = p->opt; | ||
22 | p->opt = NULL; | ||
23 | } else if (p->argc == 1 && (opt->flags & PARSE_OPT_LASTARG_DEFAULT)) { | ||
24 | *arg = (const char *)opt->defval; | ||
25 | } else if (p->argc > 1) { | ||
26 | p->argc--; | ||
27 | *arg = *++p->argv; | ||
28 | } else | ||
29 | return opterror(opt, "requires a value", flags); | ||
30 | return 0; | ||
31 | } | ||
32 | |||
33 | static int get_value(struct parse_opt_ctx_t *p, | ||
34 | const struct option *opt, int flags) | ||
35 | { | ||
36 | const char *s, *arg = NULL; | ||
37 | const int unset = flags & OPT_UNSET; | ||
38 | |||
39 | if (unset && p->opt) | ||
40 | return opterror(opt, "takes no value", flags); | ||
41 | if (unset && (opt->flags & PARSE_OPT_NONEG)) | ||
42 | return opterror(opt, "isn't available", flags); | ||
43 | |||
44 | if (!(flags & OPT_SHORT) && p->opt) { | ||
45 | switch (opt->type) { | ||
46 | case OPTION_CALLBACK: | ||
47 | if (!(opt->flags & PARSE_OPT_NOARG)) | ||
48 | break; | ||
49 | /* FALLTHROUGH */ | ||
50 | case OPTION_BOOLEAN: | ||
51 | case OPTION_BIT: | ||
52 | case OPTION_SET_INT: | ||
53 | case OPTION_SET_PTR: | ||
54 | return opterror(opt, "takes no value", flags); | ||
55 | default: | ||
56 | break; | ||
57 | } | ||
58 | } | ||
59 | |||
60 | switch (opt->type) { | ||
61 | case OPTION_BIT: | ||
62 | if (unset) | ||
63 | *(int *)opt->value &= ~opt->defval; | ||
64 | else | ||
65 | *(int *)opt->value |= opt->defval; | ||
66 | return 0; | ||
67 | |||
68 | case OPTION_BOOLEAN: | ||
69 | *(int *)opt->value = unset ? 0 : *(int *)opt->value + 1; | ||
70 | return 0; | ||
71 | |||
72 | case OPTION_SET_INT: | ||
73 | *(int *)opt->value = unset ? 0 : opt->defval; | ||
74 | return 0; | ||
75 | |||
76 | case OPTION_SET_PTR: | ||
77 | *(void **)opt->value = unset ? NULL : (void *)opt->defval; | ||
78 | return 0; | ||
79 | |||
80 | case OPTION_STRING: | ||
81 | if (unset) | ||
82 | *(const char **)opt->value = NULL; | ||
83 | else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) | ||
84 | *(const char **)opt->value = (const char *)opt->defval; | ||
85 | else | ||
86 | return get_arg(p, opt, flags, (const char **)opt->value); | ||
87 | return 0; | ||
88 | |||
89 | case OPTION_CALLBACK: | ||
90 | if (unset) | ||
91 | return (*opt->callback)(opt, NULL, 1) ? (-1) : 0; | ||
92 | if (opt->flags & PARSE_OPT_NOARG) | ||
93 | return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; | ||
94 | if (opt->flags & PARSE_OPT_OPTARG && !p->opt) | ||
95 | return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; | ||
96 | if (get_arg(p, opt, flags, &arg)) | ||
97 | return -1; | ||
98 | return (*opt->callback)(opt, arg, 0) ? (-1) : 0; | ||
99 | |||
100 | case OPTION_INTEGER: | ||
101 | if (unset) { | ||
102 | *(int *)opt->value = 0; | ||
103 | return 0; | ||
104 | } | ||
105 | if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { | ||
106 | *(int *)opt->value = opt->defval; | ||
107 | return 0; | ||
108 | } | ||
109 | if (get_arg(p, opt, flags, &arg)) | ||
110 | return -1; | ||
111 | *(int *)opt->value = strtol(arg, (char **)&s, 10); | ||
112 | if (*s) | ||
113 | return opterror(opt, "expects a numerical value", flags); | ||
114 | return 0; | ||
115 | |||
116 | case OPTION_LONG: | ||
117 | if (unset) { | ||
118 | *(long *)opt->value = 0; | ||
119 | return 0; | ||
120 | } | ||
121 | if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { | ||
122 | *(long *)opt->value = opt->defval; | ||
123 | return 0; | ||
124 | } | ||
125 | if (get_arg(p, opt, flags, &arg)) | ||
126 | return -1; | ||
127 | *(long *)opt->value = strtol(arg, (char **)&s, 10); | ||
128 | if (*s) | ||
129 | return opterror(opt, "expects a numerical value", flags); | ||
130 | return 0; | ||
131 | |||
132 | default: | ||
133 | die("should not happen, someone must be hit on the forehead"); | ||
134 | } | ||
135 | } | ||
136 | |||
137 | static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options) | ||
138 | { | ||
139 | for (; options->type != OPTION_END; options++) { | ||
140 | if (options->short_name == *p->opt) { | ||
141 | p->opt = p->opt[1] ? p->opt + 1 : NULL; | ||
142 | return get_value(p, options, OPT_SHORT); | ||
143 | } | ||
144 | } | ||
145 | return -2; | ||
146 | } | ||
147 | |||
148 | static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, | ||
149 | const struct option *options) | ||
150 | { | ||
151 | const char *arg_end = strchr(arg, '='); | ||
152 | const struct option *abbrev_option = NULL, *ambiguous_option = NULL; | ||
153 | int abbrev_flags = 0, ambiguous_flags = 0; | ||
154 | |||
155 | if (!arg_end) | ||
156 | arg_end = arg + strlen(arg); | ||
157 | |||
158 | for (; options->type != OPTION_END; options++) { | ||
159 | const char *rest; | ||
160 | int flags = 0; | ||
161 | |||
162 | if (!options->long_name) | ||
163 | continue; | ||
164 | |||
165 | rest = skip_prefix(arg, options->long_name); | ||
166 | if (options->type == OPTION_ARGUMENT) { | ||
167 | if (!rest) | ||
168 | continue; | ||
169 | if (*rest == '=') | ||
170 | return opterror(options, "takes no value", flags); | ||
171 | if (*rest) | ||
172 | continue; | ||
173 | p->out[p->cpidx++] = arg - 2; | ||
174 | return 0; | ||
175 | } | ||
176 | if (!rest) { | ||
177 | /* abbreviated? */ | ||
178 | if (!strncmp(options->long_name, arg, arg_end - arg)) { | ||
179 | is_abbreviated: | ||
180 | if (abbrev_option) { | ||
181 | /* | ||
182 | * If this is abbreviated, it is | ||
183 | * ambiguous. So when there is no | ||
184 | * exact match later, we need to | ||
185 | * error out. | ||
186 | */ | ||
187 | ambiguous_option = abbrev_option; | ||
188 | ambiguous_flags = abbrev_flags; | ||
189 | } | ||
190 | if (!(flags & OPT_UNSET) && *arg_end) | ||
191 | p->opt = arg_end + 1; | ||
192 | abbrev_option = options; | ||
193 | abbrev_flags = flags; | ||
194 | continue; | ||
195 | } | ||
196 | /* negated and abbreviated very much? */ | ||
197 | if (!prefixcmp("no-", arg)) { | ||
198 | flags |= OPT_UNSET; | ||
199 | goto is_abbreviated; | ||
200 | } | ||
201 | /* negated? */ | ||
202 | if (strncmp(arg, "no-", 3)) | ||
203 | continue; | ||
204 | flags |= OPT_UNSET; | ||
205 | rest = skip_prefix(arg + 3, options->long_name); | ||
206 | /* abbreviated and negated? */ | ||
207 | if (!rest && !prefixcmp(options->long_name, arg + 3)) | ||
208 | goto is_abbreviated; | ||
209 | if (!rest) | ||
210 | continue; | ||
211 | } | ||
212 | if (*rest) { | ||
213 | if (*rest != '=') | ||
214 | continue; | ||
215 | p->opt = rest + 1; | ||
216 | } | ||
217 | return get_value(p, options, flags); | ||
218 | } | ||
219 | |||
220 | if (ambiguous_option) | ||
221 | return error("Ambiguous option: %s " | ||
222 | "(could be --%s%s or --%s%s)", | ||
223 | arg, | ||
224 | (ambiguous_flags & OPT_UNSET) ? "no-" : "", | ||
225 | ambiguous_option->long_name, | ||
226 | (abbrev_flags & OPT_UNSET) ? "no-" : "", | ||
227 | abbrev_option->long_name); | ||
228 | if (abbrev_option) | ||
229 | return get_value(p, abbrev_option, abbrev_flags); | ||
230 | return -2; | ||
231 | } | ||
232 | |||
233 | static void check_typos(const char *arg, const struct option *options) | ||
234 | { | ||
235 | if (strlen(arg) < 3) | ||
236 | return; | ||
237 | |||
238 | if (!prefixcmp(arg, "no-")) { | ||
239 | error ("did you mean `--%s` (with two dashes ?)", arg); | ||
240 | exit(129); | ||
241 | } | ||
242 | |||
243 | for (; options->type != OPTION_END; options++) { | ||
244 | if (!options->long_name) | ||
245 | continue; | ||
246 | if (!prefixcmp(options->long_name, arg)) { | ||
247 | error ("did you mean `--%s` (with two dashes ?)", arg); | ||
248 | exit(129); | ||
249 | } | ||
250 | } | ||
251 | } | ||
252 | |||
253 | void parse_options_start(struct parse_opt_ctx_t *ctx, | ||
254 | int argc, const char **argv, int flags) | ||
255 | { | ||
256 | memset(ctx, 0, sizeof(*ctx)); | ||
257 | ctx->argc = argc - 1; | ||
258 | ctx->argv = argv + 1; | ||
259 | ctx->out = argv; | ||
260 | ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0); | ||
261 | ctx->flags = flags; | ||
262 | if ((flags & PARSE_OPT_KEEP_UNKNOWN) && | ||
263 | (flags & PARSE_OPT_STOP_AT_NON_OPTION)) | ||
264 | die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together"); | ||
265 | } | ||
266 | |||
267 | static int usage_with_options_internal(const char * const *, | ||
268 | const struct option *, int); | ||
269 | |||
270 | int parse_options_step(struct parse_opt_ctx_t *ctx, | ||
271 | const struct option *options, | ||
272 | const char * const usagestr[]) | ||
273 | { | ||
274 | int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP); | ||
275 | |||
276 | /* we must reset ->opt, unknown short option leave it dangling */ | ||
277 | ctx->opt = NULL; | ||
278 | |||
279 | for (; ctx->argc; ctx->argc--, ctx->argv++) { | ||
280 | const char *arg = ctx->argv[0]; | ||
281 | |||
282 | if (*arg != '-' || !arg[1]) { | ||
283 | if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) | ||
284 | break; | ||
285 | ctx->out[ctx->cpidx++] = ctx->argv[0]; | ||
286 | continue; | ||
287 | } | ||
288 | |||
289 | if (arg[1] != '-') { | ||
290 | ctx->opt = arg + 1; | ||
291 | if (internal_help && *ctx->opt == 'h') | ||
292 | return parse_options_usage(usagestr, options); | ||
293 | switch (parse_short_opt(ctx, options)) { | ||
294 | case -1: | ||
295 | return parse_options_usage(usagestr, options); | ||
296 | case -2: | ||
297 | goto unknown; | ||
298 | } | ||
299 | if (ctx->opt) | ||
300 | check_typos(arg + 1, options); | ||
301 | while (ctx->opt) { | ||
302 | if (internal_help && *ctx->opt == 'h') | ||
303 | return parse_options_usage(usagestr, options); | ||
304 | switch (parse_short_opt(ctx, options)) { | ||
305 | case -1: | ||
306 | return parse_options_usage(usagestr, options); | ||
307 | case -2: | ||
308 | /* fake a short option thing to hide the fact that we may have | ||
309 | * started to parse aggregated stuff | ||
310 | * | ||
311 | * This is leaky, too bad. | ||
312 | */ | ||
313 | ctx->argv[0] = strdup(ctx->opt - 1); | ||
314 | *(char *)ctx->argv[0] = '-'; | ||
315 | goto unknown; | ||
316 | } | ||
317 | } | ||
318 | continue; | ||
319 | } | ||
320 | |||
321 | if (!arg[2]) { /* "--" */ | ||
322 | if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) { | ||
323 | ctx->argc--; | ||
324 | ctx->argv++; | ||
325 | } | ||
326 | break; | ||
327 | } | ||
328 | |||
329 | if (internal_help && !strcmp(arg + 2, "help-all")) | ||
330 | return usage_with_options_internal(usagestr, options, 1); | ||
331 | if (internal_help && !strcmp(arg + 2, "help")) | ||
332 | return parse_options_usage(usagestr, options); | ||
333 | switch (parse_long_opt(ctx, arg + 2, options)) { | ||
334 | case -1: | ||
335 | return parse_options_usage(usagestr, options); | ||
336 | case -2: | ||
337 | goto unknown; | ||
338 | } | ||
339 | continue; | ||
340 | unknown: | ||
341 | if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN)) | ||
342 | return PARSE_OPT_UNKNOWN; | ||
343 | ctx->out[ctx->cpidx++] = ctx->argv[0]; | ||
344 | ctx->opt = NULL; | ||
345 | } | ||
346 | return PARSE_OPT_DONE; | ||
347 | } | ||
348 | |||
349 | int parse_options_end(struct parse_opt_ctx_t *ctx) | ||
350 | { | ||
351 | memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out)); | ||
352 | ctx->out[ctx->cpidx + ctx->argc] = NULL; | ||
353 | return ctx->cpidx + ctx->argc; | ||
354 | } | ||
355 | |||
356 | int parse_options(int argc, const char **argv, const struct option *options, | ||
357 | const char * const usagestr[], int flags) | ||
358 | { | ||
359 | struct parse_opt_ctx_t ctx; | ||
360 | |||
361 | parse_options_start(&ctx, argc, argv, flags); | ||
362 | switch (parse_options_step(&ctx, options, usagestr)) { | ||
363 | case PARSE_OPT_HELP: | ||
364 | exit(129); | ||
365 | case PARSE_OPT_DONE: | ||
366 | break; | ||
367 | default: /* PARSE_OPT_UNKNOWN */ | ||
368 | if (ctx.argv[0][1] == '-') { | ||
369 | error("unknown option `%s'", ctx.argv[0] + 2); | ||
370 | } else { | ||
371 | error("unknown switch `%c'", *ctx.opt); | ||
372 | } | ||
373 | usage_with_options(usagestr, options); | ||
374 | } | ||
375 | |||
376 | return parse_options_end(&ctx); | ||
377 | } | ||
378 | |||
379 | #define USAGE_OPTS_WIDTH 24 | ||
380 | #define USAGE_GAP 2 | ||
381 | |||
382 | int usage_with_options_internal(const char * const *usagestr, | ||
383 | const struct option *opts, int full) | ||
384 | { | ||
385 | if (!usagestr) | ||
386 | return PARSE_OPT_HELP; | ||
387 | |||
388 | fprintf(stderr, "\n usage: %s\n", *usagestr++); | ||
389 | while (*usagestr && **usagestr) | ||
390 | fprintf(stderr, " or: %s\n", *usagestr++); | ||
391 | while (*usagestr) { | ||
392 | fprintf(stderr, "%s%s\n", | ||
393 | **usagestr ? " " : "", | ||
394 | *usagestr); | ||
395 | usagestr++; | ||
396 | } | ||
397 | |||
398 | if (opts->type != OPTION_GROUP) | ||
399 | fputc('\n', stderr); | ||
400 | |||
401 | for (; opts->type != OPTION_END; opts++) { | ||
402 | size_t pos; | ||
403 | int pad; | ||
404 | |||
405 | if (opts->type == OPTION_GROUP) { | ||
406 | fputc('\n', stderr); | ||
407 | if (*opts->help) | ||
408 | fprintf(stderr, "%s\n", opts->help); | ||
409 | continue; | ||
410 | } | ||
411 | if (!full && (opts->flags & PARSE_OPT_HIDDEN)) | ||
412 | continue; | ||
413 | |||
414 | pos = fprintf(stderr, " "); | ||
415 | if (opts->short_name) | ||
416 | pos += fprintf(stderr, "-%c", opts->short_name); | ||
417 | if (opts->long_name && opts->short_name) | ||
418 | pos += fprintf(stderr, ", "); | ||
419 | if (opts->long_name) | ||
420 | pos += fprintf(stderr, "--%s", opts->long_name); | ||
421 | |||
422 | switch (opts->type) { | ||
423 | case OPTION_ARGUMENT: | ||
424 | break; | ||
425 | case OPTION_INTEGER: | ||
426 | if (opts->flags & PARSE_OPT_OPTARG) | ||
427 | if (opts->long_name) | ||
428 | pos += fprintf(stderr, "[=<n>]"); | ||
429 | else | ||
430 | pos += fprintf(stderr, "[<n>]"); | ||
431 | else | ||
432 | pos += fprintf(stderr, " <n>"); | ||
433 | break; | ||
434 | case OPTION_CALLBACK: | ||
435 | if (opts->flags & PARSE_OPT_NOARG) | ||
436 | break; | ||
437 | /* FALLTHROUGH */ | ||
438 | case OPTION_STRING: | ||
439 | if (opts->argh) { | ||
440 | if (opts->flags & PARSE_OPT_OPTARG) | ||
441 | if (opts->long_name) | ||
442 | pos += fprintf(stderr, "[=<%s>]", opts->argh); | ||
443 | else | ||
444 | pos += fprintf(stderr, "[<%s>]", opts->argh); | ||
445 | else | ||
446 | pos += fprintf(stderr, " <%s>", opts->argh); | ||
447 | } else { | ||
448 | if (opts->flags & PARSE_OPT_OPTARG) | ||
449 | if (opts->long_name) | ||
450 | pos += fprintf(stderr, "[=...]"); | ||
451 | else | ||
452 | pos += fprintf(stderr, "[...]"); | ||
453 | else | ||
454 | pos += fprintf(stderr, " ..."); | ||
455 | } | ||
456 | break; | ||
457 | default: /* OPTION_{BIT,BOOLEAN,SET_INT,SET_PTR} */ | ||
458 | break; | ||
459 | } | ||
460 | |||
461 | if (pos <= USAGE_OPTS_WIDTH) | ||
462 | pad = USAGE_OPTS_WIDTH - pos; | ||
463 | else { | ||
464 | fputc('\n', stderr); | ||
465 | pad = USAGE_OPTS_WIDTH; | ||
466 | } | ||
467 | fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help); | ||
468 | } | ||
469 | fputc('\n', stderr); | ||
470 | |||
471 | return PARSE_OPT_HELP; | ||
472 | } | ||
473 | |||
474 | void usage_with_options(const char * const *usagestr, | ||
475 | const struct option *opts) | ||
476 | { | ||
477 | usage_with_options_internal(usagestr, opts, 0); | ||
478 | exit(129); | ||
479 | } | ||
480 | |||
481 | int parse_options_usage(const char * const *usagestr, | ||
482 | const struct option *opts) | ||
483 | { | ||
484 | return usage_with_options_internal(usagestr, opts, 0); | ||
485 | } | ||
486 | |||
487 | |||
488 | int parse_opt_verbosity_cb(const struct option *opt, const char *arg, | ||
489 | int unset) | ||
490 | { | ||
491 | int *target = opt->value; | ||
492 | |||
493 | if (unset) | ||
494 | /* --no-quiet, --no-verbose */ | ||
495 | *target = 0; | ||
496 | else if (opt->short_name == 'v') { | ||
497 | if (*target >= 0) | ||
498 | (*target)++; | ||
499 | else | ||
500 | *target = 1; | ||
501 | } else { | ||
502 | if (*target <= 0) | ||
503 | (*target)--; | ||
504 | else | ||
505 | *target = -1; | ||
506 | } | ||
507 | return 0; | ||
508 | } | ||
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h new file mode 100644 index 000000000000..a1039a6ce0eb --- /dev/null +++ b/tools/perf/util/parse-options.h | |||
@@ -0,0 +1,174 @@ | |||
1 | #ifndef PARSE_OPTIONS_H | ||
2 | #define PARSE_OPTIONS_H | ||
3 | |||
4 | enum parse_opt_type { | ||
5 | /* special types */ | ||
6 | OPTION_END, | ||
7 | OPTION_ARGUMENT, | ||
8 | OPTION_GROUP, | ||
9 | /* options with no arguments */ | ||
10 | OPTION_BIT, | ||
11 | OPTION_BOOLEAN, /* _INCR would have been a better name */ | ||
12 | OPTION_SET_INT, | ||
13 | OPTION_SET_PTR, | ||
14 | /* options with arguments (usually) */ | ||
15 | OPTION_STRING, | ||
16 | OPTION_INTEGER, | ||
17 | OPTION_LONG, | ||
18 | OPTION_CALLBACK, | ||
19 | }; | ||
20 | |||
21 | enum parse_opt_flags { | ||
22 | PARSE_OPT_KEEP_DASHDASH = 1, | ||
23 | PARSE_OPT_STOP_AT_NON_OPTION = 2, | ||
24 | PARSE_OPT_KEEP_ARGV0 = 4, | ||
25 | PARSE_OPT_KEEP_UNKNOWN = 8, | ||
26 | PARSE_OPT_NO_INTERNAL_HELP = 16, | ||
27 | }; | ||
28 | |||
29 | enum parse_opt_option_flags { | ||
30 | PARSE_OPT_OPTARG = 1, | ||
31 | PARSE_OPT_NOARG = 2, | ||
32 | PARSE_OPT_NONEG = 4, | ||
33 | PARSE_OPT_HIDDEN = 8, | ||
34 | PARSE_OPT_LASTARG_DEFAULT = 16, | ||
35 | }; | ||
36 | |||
37 | struct option; | ||
38 | typedef int parse_opt_cb(const struct option *, const char *arg, int unset); | ||
39 | |||
40 | /* | ||
41 | * `type`:: | ||
42 | * holds the type of the option, you must have an OPTION_END last in your | ||
43 | * array. | ||
44 | * | ||
45 | * `short_name`:: | ||
46 | * the character to use as a short option name, '\0' if none. | ||
47 | * | ||
48 | * `long_name`:: | ||
49 | * the long option name, without the leading dashes, NULL if none. | ||
50 | * | ||
51 | * `value`:: | ||
52 | * stores pointers to the values to be filled. | ||
53 | * | ||
54 | * `argh`:: | ||
55 | * token to explain the kind of argument this option wants. Keep it | ||
56 | * homogenous across the repository. | ||
57 | * | ||
58 | * `help`:: | ||
59 | * the short help associated to what the option does. | ||
60 | * Must never be NULL (except for OPTION_END). | ||
61 | * OPTION_GROUP uses this pointer to store the group header. | ||
62 | * | ||
63 | * `flags`:: | ||
64 | * mask of parse_opt_option_flags. | ||
65 | * PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs) | ||
66 | * PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs | ||
67 | * PARSE_OPT_NONEG: says that this option cannot be negated | ||
68 | * PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in | ||
69 | * the long one. | ||
70 | * | ||
71 | * `callback`:: | ||
72 | * pointer to the callback to use for OPTION_CALLBACK. | ||
73 | * | ||
74 | * `defval`:: | ||
75 | * default value to fill (*->value) with for PARSE_OPT_OPTARG. | ||
76 | * OPTION_{BIT,SET_INT,SET_PTR} store the {mask,integer,pointer} to put in | ||
77 | * the value when met. | ||
78 | * CALLBACKS can use it like they want. | ||
79 | */ | ||
80 | struct option { | ||
81 | enum parse_opt_type type; | ||
82 | int short_name; | ||
83 | const char *long_name; | ||
84 | void *value; | ||
85 | const char *argh; | ||
86 | const char *help; | ||
87 | |||
88 | int flags; | ||
89 | parse_opt_cb *callback; | ||
90 | intptr_t defval; | ||
91 | }; | ||
92 | |||
93 | #define OPT_END() { OPTION_END } | ||
94 | #define OPT_ARGUMENT(l, h) { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) } | ||
95 | #define OPT_GROUP(h) { OPTION_GROUP, 0, NULL, NULL, NULL, (h) } | ||
96 | #define OPT_BIT(s, l, v, h, b) { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) } | ||
97 | #define OPT_BOOLEAN(s, l, v, h) { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) } | ||
98 | #define OPT_SET_INT(s, l, v, h, i) { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) } | ||
99 | #define OPT_SET_PTR(s, l, v, h, p) { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) } | ||
100 | #define OPT_INTEGER(s, l, v, h) { OPTION_INTEGER, (s), (l), (v), NULL, (h) } | ||
101 | #define OPT_LONG(s, l, v, h) { OPTION_LONG, (s), (l), (v), NULL, (h) } | ||
102 | #define OPT_STRING(s, l, v, a, h) { OPTION_STRING, (s), (l), (v), (a), (h) } | ||
103 | #define OPT_DATE(s, l, v, h) \ | ||
104 | { OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \ | ||
105 | parse_opt_approxidate_cb } | ||
106 | #define OPT_CALLBACK(s, l, v, a, h, f) \ | ||
107 | { OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) } | ||
108 | |||
109 | /* parse_options() will filter out the processed options and leave the | ||
110 | * non-option argments in argv[]. | ||
111 | * Returns the number of arguments left in argv[]. | ||
112 | */ | ||
113 | extern int parse_options(int argc, const char **argv, | ||
114 | const struct option *options, | ||
115 | const char * const usagestr[], int flags); | ||
116 | |||
117 | extern NORETURN void usage_with_options(const char * const *usagestr, | ||
118 | const struct option *options); | ||
119 | |||
120 | /*----- incremantal advanced APIs -----*/ | ||
121 | |||
122 | enum { | ||
123 | PARSE_OPT_HELP = -1, | ||
124 | PARSE_OPT_DONE, | ||
125 | PARSE_OPT_UNKNOWN, | ||
126 | }; | ||
127 | |||
128 | /* | ||
129 | * It's okay for the caller to consume argv/argc in the usual way. | ||
130 | * Other fields of that structure are private to parse-options and should not | ||
131 | * be modified in any way. | ||
132 | */ | ||
133 | struct parse_opt_ctx_t { | ||
134 | const char **argv; | ||
135 | const char **out; | ||
136 | int argc, cpidx; | ||
137 | const char *opt; | ||
138 | int flags; | ||
139 | }; | ||
140 | |||
141 | extern int parse_options_usage(const char * const *usagestr, | ||
142 | const struct option *opts); | ||
143 | |||
144 | extern void parse_options_start(struct parse_opt_ctx_t *ctx, | ||
145 | int argc, const char **argv, int flags); | ||
146 | |||
147 | extern int parse_options_step(struct parse_opt_ctx_t *ctx, | ||
148 | const struct option *options, | ||
149 | const char * const usagestr[]); | ||
150 | |||
151 | extern int parse_options_end(struct parse_opt_ctx_t *ctx); | ||
152 | |||
153 | |||
154 | /*----- some often used options -----*/ | ||
155 | extern int parse_opt_abbrev_cb(const struct option *, const char *, int); | ||
156 | extern int parse_opt_approxidate_cb(const struct option *, const char *, int); | ||
157 | extern int parse_opt_verbosity_cb(const struct option *, const char *, int); | ||
158 | |||
159 | #define OPT__VERBOSE(var) OPT_BOOLEAN('v', "verbose", (var), "be verbose") | ||
160 | #define OPT__QUIET(var) OPT_BOOLEAN('q', "quiet", (var), "be quiet") | ||
161 | #define OPT__VERBOSITY(var) \ | ||
162 | { OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \ | ||
163 | PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \ | ||
164 | { OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \ | ||
165 | PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 } | ||
166 | #define OPT__DRY_RUN(var) OPT_BOOLEAN('n', "dry-run", (var), "dry run") | ||
167 | #define OPT__ABBREV(var) \ | ||
168 | { OPTION_CALLBACK, 0, "abbrev", (var), "n", \ | ||
169 | "use <n> digits to display SHA-1s", \ | ||
170 | PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 } | ||
171 | |||
172 | extern const char *parse_options_fix_filename(const char *prefix, const char *file); | ||
173 | |||
174 | #endif | ||
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c new file mode 100644 index 000000000000..a501a40dd2cb --- /dev/null +++ b/tools/perf/util/path.c | |||
@@ -0,0 +1,353 @@ | |||
1 | /* | ||
2 | * I'm tired of doing "vsnprintf()" etc just to open a | ||
3 | * file, so here's a "return static buffer with printf" | ||
4 | * interface for paths. | ||
5 | * | ||
6 | * It's obviously not thread-safe. Sue me. But it's quite | ||
7 | * useful for doing things like | ||
8 | * | ||
9 | * f = open(mkpath("%s/%s.perf", base, name), O_RDONLY); | ||
10 | * | ||
11 | * which is what it's designed for. | ||
12 | */ | ||
13 | #include "cache.h" | ||
14 | |||
15 | static char bad_path[] = "/bad-path/"; | ||
16 | /* | ||
17 | * Two hacks: | ||
18 | */ | ||
19 | |||
20 | static char *get_perf_dir(void) | ||
21 | { | ||
22 | return "."; | ||
23 | } | ||
24 | |||
25 | size_t strlcpy(char *dest, const char *src, size_t size) | ||
26 | { | ||
27 | size_t ret = strlen(src); | ||
28 | |||
29 | if (size) { | ||
30 | size_t len = (ret >= size) ? size - 1 : ret; | ||
31 | memcpy(dest, src, len); | ||
32 | dest[len] = '\0'; | ||
33 | } | ||
34 | return ret; | ||
35 | } | ||
36 | |||
37 | |||
38 | static char *get_pathname(void) | ||
39 | { | ||
40 | static char pathname_array[4][PATH_MAX]; | ||
41 | static int index; | ||
42 | return pathname_array[3 & ++index]; | ||
43 | } | ||
44 | |||
45 | static char *cleanup_path(char *path) | ||
46 | { | ||
47 | /* Clean it up */ | ||
48 | if (!memcmp(path, "./", 2)) { | ||
49 | path += 2; | ||
50 | while (*path == '/') | ||
51 | path++; | ||
52 | } | ||
53 | return path; | ||
54 | } | ||
55 | |||
56 | char *mksnpath(char *buf, size_t n, const char *fmt, ...) | ||
57 | { | ||
58 | va_list args; | ||
59 | unsigned len; | ||
60 | |||
61 | va_start(args, fmt); | ||
62 | len = vsnprintf(buf, n, fmt, args); | ||
63 | va_end(args); | ||
64 | if (len >= n) { | ||
65 | strlcpy(buf, bad_path, n); | ||
66 | return buf; | ||
67 | } | ||
68 | return cleanup_path(buf); | ||
69 | } | ||
70 | |||
71 | static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args) | ||
72 | { | ||
73 | const char *perf_dir = get_perf_dir(); | ||
74 | size_t len; | ||
75 | |||
76 | len = strlen(perf_dir); | ||
77 | if (n < len + 1) | ||
78 | goto bad; | ||
79 | memcpy(buf, perf_dir, len); | ||
80 | if (len && !is_dir_sep(perf_dir[len-1])) | ||
81 | buf[len++] = '/'; | ||
82 | len += vsnprintf(buf + len, n - len, fmt, args); | ||
83 | if (len >= n) | ||
84 | goto bad; | ||
85 | return cleanup_path(buf); | ||
86 | bad: | ||
87 | strlcpy(buf, bad_path, n); | ||
88 | return buf; | ||
89 | } | ||
90 | |||
91 | char *perf_snpath(char *buf, size_t n, const char *fmt, ...) | ||
92 | { | ||
93 | va_list args; | ||
94 | va_start(args, fmt); | ||
95 | (void)perf_vsnpath(buf, n, fmt, args); | ||
96 | va_end(args); | ||
97 | return buf; | ||
98 | } | ||
99 | |||
100 | char *perf_pathdup(const char *fmt, ...) | ||
101 | { | ||
102 | char path[PATH_MAX]; | ||
103 | va_list args; | ||
104 | va_start(args, fmt); | ||
105 | (void)perf_vsnpath(path, sizeof(path), fmt, args); | ||
106 | va_end(args); | ||
107 | return xstrdup(path); | ||
108 | } | ||
109 | |||
110 | char *mkpath(const char *fmt, ...) | ||
111 | { | ||
112 | va_list args; | ||
113 | unsigned len; | ||
114 | char *pathname = get_pathname(); | ||
115 | |||
116 | va_start(args, fmt); | ||
117 | len = vsnprintf(pathname, PATH_MAX, fmt, args); | ||
118 | va_end(args); | ||
119 | if (len >= PATH_MAX) | ||
120 | return bad_path; | ||
121 | return cleanup_path(pathname); | ||
122 | } | ||
123 | |||
124 | char *perf_path(const char *fmt, ...) | ||
125 | { | ||
126 | const char *perf_dir = get_perf_dir(); | ||
127 | char *pathname = get_pathname(); | ||
128 | va_list args; | ||
129 | unsigned len; | ||
130 | |||
131 | len = strlen(perf_dir); | ||
132 | if (len > PATH_MAX-100) | ||
133 | return bad_path; | ||
134 | memcpy(pathname, perf_dir, len); | ||
135 | if (len && perf_dir[len-1] != '/') | ||
136 | pathname[len++] = '/'; | ||
137 | va_start(args, fmt); | ||
138 | len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args); | ||
139 | va_end(args); | ||
140 | if (len >= PATH_MAX) | ||
141 | return bad_path; | ||
142 | return cleanup_path(pathname); | ||
143 | } | ||
144 | |||
145 | |||
146 | /* perf_mkstemp() - create tmp file honoring TMPDIR variable */ | ||
147 | int perf_mkstemp(char *path, size_t len, const char *template) | ||
148 | { | ||
149 | const char *tmp; | ||
150 | size_t n; | ||
151 | |||
152 | tmp = getenv("TMPDIR"); | ||
153 | if (!tmp) | ||
154 | tmp = "/tmp"; | ||
155 | n = snprintf(path, len, "%s/%s", tmp, template); | ||
156 | if (len <= n) { | ||
157 | errno = ENAMETOOLONG; | ||
158 | return -1; | ||
159 | } | ||
160 | return mkstemp(path); | ||
161 | } | ||
162 | |||
163 | |||
164 | const char *make_relative_path(const char *abs, const char *base) | ||
165 | { | ||
166 | static char buf[PATH_MAX + 1]; | ||
167 | int baselen; | ||
168 | if (!base) | ||
169 | return abs; | ||
170 | baselen = strlen(base); | ||
171 | if (prefixcmp(abs, base)) | ||
172 | return abs; | ||
173 | if (abs[baselen] == '/') | ||
174 | baselen++; | ||
175 | else if (base[baselen - 1] != '/') | ||
176 | return abs; | ||
177 | strcpy(buf, abs + baselen); | ||
178 | return buf; | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * It is okay if dst == src, but they should not overlap otherwise. | ||
183 | * | ||
184 | * Performs the following normalizations on src, storing the result in dst: | ||
185 | * - Ensures that components are separated by '/' (Windows only) | ||
186 | * - Squashes sequences of '/'. | ||
187 | * - Removes "." components. | ||
188 | * - Removes ".." components, and the components the precede them. | ||
189 | * Returns failure (non-zero) if a ".." component appears as first path | ||
190 | * component anytime during the normalization. Otherwise, returns success (0). | ||
191 | * | ||
192 | * Note that this function is purely textual. It does not follow symlinks, | ||
193 | * verify the existence of the path, or make any system calls. | ||
194 | */ | ||
195 | int normalize_path_copy(char *dst, const char *src) | ||
196 | { | ||
197 | char *dst0; | ||
198 | |||
199 | if (has_dos_drive_prefix(src)) { | ||
200 | *dst++ = *src++; | ||
201 | *dst++ = *src++; | ||
202 | } | ||
203 | dst0 = dst; | ||
204 | |||
205 | if (is_dir_sep(*src)) { | ||
206 | *dst++ = '/'; | ||
207 | while (is_dir_sep(*src)) | ||
208 | src++; | ||
209 | } | ||
210 | |||
211 | for (;;) { | ||
212 | char c = *src; | ||
213 | |||
214 | /* | ||
215 | * A path component that begins with . could be | ||
216 | * special: | ||
217 | * (1) "." and ends -- ignore and terminate. | ||
218 | * (2) "./" -- ignore them, eat slash and continue. | ||
219 | * (3) ".." and ends -- strip one and terminate. | ||
220 | * (4) "../" -- strip one, eat slash and continue. | ||
221 | */ | ||
222 | if (c == '.') { | ||
223 | if (!src[1]) { | ||
224 | /* (1) */ | ||
225 | src++; | ||
226 | } else if (is_dir_sep(src[1])) { | ||
227 | /* (2) */ | ||
228 | src += 2; | ||
229 | while (is_dir_sep(*src)) | ||
230 | src++; | ||
231 | continue; | ||
232 | } else if (src[1] == '.') { | ||
233 | if (!src[2]) { | ||
234 | /* (3) */ | ||
235 | src += 2; | ||
236 | goto up_one; | ||
237 | } else if (is_dir_sep(src[2])) { | ||
238 | /* (4) */ | ||
239 | src += 3; | ||
240 | while (is_dir_sep(*src)) | ||
241 | src++; | ||
242 | goto up_one; | ||
243 | } | ||
244 | } | ||
245 | } | ||
246 | |||
247 | /* copy up to the next '/', and eat all '/' */ | ||
248 | while ((c = *src++) != '\0' && !is_dir_sep(c)) | ||
249 | *dst++ = c; | ||
250 | if (is_dir_sep(c)) { | ||
251 | *dst++ = '/'; | ||
252 | while (is_dir_sep(c)) | ||
253 | c = *src++; | ||
254 | src--; | ||
255 | } else if (!c) | ||
256 | break; | ||
257 | continue; | ||
258 | |||
259 | up_one: | ||
260 | /* | ||
261 | * dst0..dst is prefix portion, and dst[-1] is '/'; | ||
262 | * go up one level. | ||
263 | */ | ||
264 | dst--; /* go to trailing '/' */ | ||
265 | if (dst <= dst0) | ||
266 | return -1; | ||
267 | /* Windows: dst[-1] cannot be backslash anymore */ | ||
268 | while (dst0 < dst && dst[-1] != '/') | ||
269 | dst--; | ||
270 | } | ||
271 | *dst = '\0'; | ||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * path = Canonical absolute path | ||
277 | * prefix_list = Colon-separated list of absolute paths | ||
278 | * | ||
279 | * Determines, for each path in prefix_list, whether the "prefix" really | ||
280 | * is an ancestor directory of path. Returns the length of the longest | ||
281 | * ancestor directory, excluding any trailing slashes, or -1 if no prefix | ||
282 | * is an ancestor. (Note that this means 0 is returned if prefix_list is | ||
283 | * "/".) "/foo" is not considered an ancestor of "/foobar". Directories | ||
284 | * are not considered to be their own ancestors. path must be in a | ||
285 | * canonical form: empty components, or "." or ".." components are not | ||
286 | * allowed. prefix_list may be null, which is like "". | ||
287 | */ | ||
288 | int longest_ancestor_length(const char *path, const char *prefix_list) | ||
289 | { | ||
290 | char buf[PATH_MAX+1]; | ||
291 | const char *ceil, *colon; | ||
292 | int len, max_len = -1; | ||
293 | |||
294 | if (prefix_list == NULL || !strcmp(path, "/")) | ||
295 | return -1; | ||
296 | |||
297 | for (colon = ceil = prefix_list; *colon; ceil = colon+1) { | ||
298 | for (colon = ceil; *colon && *colon != PATH_SEP; colon++); | ||
299 | len = colon - ceil; | ||
300 | if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil)) | ||
301 | continue; | ||
302 | strlcpy(buf, ceil, len+1); | ||
303 | if (normalize_path_copy(buf, buf) < 0) | ||
304 | continue; | ||
305 | len = strlen(buf); | ||
306 | if (len > 0 && buf[len-1] == '/') | ||
307 | buf[--len] = '\0'; | ||
308 | |||
309 | if (!strncmp(path, buf, len) && | ||
310 | path[len] == '/' && | ||
311 | len > max_len) { | ||
312 | max_len = len; | ||
313 | } | ||
314 | } | ||
315 | |||
316 | return max_len; | ||
317 | } | ||
318 | |||
319 | /* strip arbitrary amount of directory separators at end of path */ | ||
320 | static inline int chomp_trailing_dir_sep(const char *path, int len) | ||
321 | { | ||
322 | while (len && is_dir_sep(path[len - 1])) | ||
323 | len--; | ||
324 | return len; | ||
325 | } | ||
326 | |||
327 | /* | ||
328 | * If path ends with suffix (complete path components), returns the | ||
329 | * part before suffix (sans trailing directory separators). | ||
330 | * Otherwise returns NULL. | ||
331 | */ | ||
332 | char *strip_path_suffix(const char *path, const char *suffix) | ||
333 | { | ||
334 | int path_len = strlen(path), suffix_len = strlen(suffix); | ||
335 | |||
336 | while (suffix_len) { | ||
337 | if (!path_len) | ||
338 | return NULL; | ||
339 | |||
340 | if (is_dir_sep(path[path_len - 1])) { | ||
341 | if (!is_dir_sep(suffix[suffix_len - 1])) | ||
342 | return NULL; | ||
343 | path_len = chomp_trailing_dir_sep(path, path_len); | ||
344 | suffix_len = chomp_trailing_dir_sep(suffix, suffix_len); | ||
345 | } | ||
346 | else if (path[--path_len] != suffix[--suffix_len]) | ||
347 | return NULL; | ||
348 | } | ||
349 | |||
350 | if (path_len && !is_dir_sep(path[path_len - 1])) | ||
351 | return NULL; | ||
352 | return xstrndup(path, chomp_trailing_dir_sep(path, path_len)); | ||
353 | } | ||
diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c new file mode 100644 index 000000000000..f18c5212bc92 --- /dev/null +++ b/tools/perf/util/quote.c | |||
@@ -0,0 +1,481 @@ | |||
1 | #include "cache.h" | ||
2 | #include "quote.h" | ||
3 | |||
4 | int quote_path_fully = 1; | ||
5 | |||
6 | /* Help to copy the thing properly quoted for the shell safety. | ||
7 | * any single quote is replaced with '\'', any exclamation point | ||
8 | * is replaced with '\!', and the whole thing is enclosed in a | ||
9 | * | ||
10 | * E.g. | ||
11 | * original sq_quote result | ||
12 | * name ==> name ==> 'name' | ||
13 | * a b ==> a b ==> 'a b' | ||
14 | * a'b ==> a'\''b ==> 'a'\''b' | ||
15 | * a!b ==> a'\!'b ==> 'a'\!'b' | ||
16 | */ | ||
17 | static inline int need_bs_quote(char c) | ||
18 | { | ||
19 | return (c == '\'' || c == '!'); | ||
20 | } | ||
21 | |||
22 | void sq_quote_buf(struct strbuf *dst, const char *src) | ||
23 | { | ||
24 | char *to_free = NULL; | ||
25 | |||
26 | if (dst->buf == src) | ||
27 | to_free = strbuf_detach(dst, NULL); | ||
28 | |||
29 | strbuf_addch(dst, '\''); | ||
30 | while (*src) { | ||
31 | size_t len = strcspn(src, "'!"); | ||
32 | strbuf_add(dst, src, len); | ||
33 | src += len; | ||
34 | while (need_bs_quote(*src)) { | ||
35 | strbuf_addstr(dst, "'\\"); | ||
36 | strbuf_addch(dst, *src++); | ||
37 | strbuf_addch(dst, '\''); | ||
38 | } | ||
39 | } | ||
40 | strbuf_addch(dst, '\''); | ||
41 | free(to_free); | ||
42 | } | ||
43 | |||
44 | void sq_quote_print(FILE *stream, const char *src) | ||
45 | { | ||
46 | char c; | ||
47 | |||
48 | fputc('\'', stream); | ||
49 | while ((c = *src++)) { | ||
50 | if (need_bs_quote(c)) { | ||
51 | fputs("'\\", stream); | ||
52 | fputc(c, stream); | ||
53 | fputc('\'', stream); | ||
54 | } else { | ||
55 | fputc(c, stream); | ||
56 | } | ||
57 | } | ||
58 | fputc('\'', stream); | ||
59 | } | ||
60 | |||
61 | void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen) | ||
62 | { | ||
63 | int i; | ||
64 | |||
65 | /* Copy into destination buffer. */ | ||
66 | strbuf_grow(dst, 255); | ||
67 | for (i = 0; argv[i]; ++i) { | ||
68 | strbuf_addch(dst, ' '); | ||
69 | sq_quote_buf(dst, argv[i]); | ||
70 | if (maxlen && dst->len > maxlen) | ||
71 | die("Too many or long arguments"); | ||
72 | } | ||
73 | } | ||
74 | |||
75 | char *sq_dequote_step(char *arg, char **next) | ||
76 | { | ||
77 | char *dst = arg; | ||
78 | char *src = arg; | ||
79 | char c; | ||
80 | |||
81 | if (*src != '\'') | ||
82 | return NULL; | ||
83 | for (;;) { | ||
84 | c = *++src; | ||
85 | if (!c) | ||
86 | return NULL; | ||
87 | if (c != '\'') { | ||
88 | *dst++ = c; | ||
89 | continue; | ||
90 | } | ||
91 | /* We stepped out of sq */ | ||
92 | switch (*++src) { | ||
93 | case '\0': | ||
94 | *dst = 0; | ||
95 | if (next) | ||
96 | *next = NULL; | ||
97 | return arg; | ||
98 | case '\\': | ||
99 | c = *++src; | ||
100 | if (need_bs_quote(c) && *++src == '\'') { | ||
101 | *dst++ = c; | ||
102 | continue; | ||
103 | } | ||
104 | /* Fallthrough */ | ||
105 | default: | ||
106 | if (!next || !isspace(*src)) | ||
107 | return NULL; | ||
108 | do { | ||
109 | c = *++src; | ||
110 | } while (isspace(c)); | ||
111 | *dst = 0; | ||
112 | *next = src; | ||
113 | return arg; | ||
114 | } | ||
115 | } | ||
116 | } | ||
117 | |||
118 | char *sq_dequote(char *arg) | ||
119 | { | ||
120 | return sq_dequote_step(arg, NULL); | ||
121 | } | ||
122 | |||
123 | int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc) | ||
124 | { | ||
125 | char *next = arg; | ||
126 | |||
127 | if (!*arg) | ||
128 | return 0; | ||
129 | do { | ||
130 | char *dequoted = sq_dequote_step(next, &next); | ||
131 | if (!dequoted) | ||
132 | return -1; | ||
133 | ALLOC_GROW(*argv, *nr + 1, *alloc); | ||
134 | (*argv)[(*nr)++] = dequoted; | ||
135 | } while (next); | ||
136 | |||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | /* 1 means: quote as octal | ||
141 | * 0 means: quote as octal if (quote_path_fully) | ||
142 | * -1 means: never quote | ||
143 | * c: quote as "\\c" | ||
144 | */ | ||
145 | #define X8(x) x, x, x, x, x, x, x, x | ||
146 | #define X16(x) X8(x), X8(x) | ||
147 | static signed char const sq_lookup[256] = { | ||
148 | /* 0 1 2 3 4 5 6 7 */ | ||
149 | /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 'a', | ||
150 | /* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r', 1, 1, | ||
151 | /* 0x10 */ X16(1), | ||
152 | /* 0x20 */ -1, -1, '"', -1, -1, -1, -1, -1, | ||
153 | /* 0x28 */ X16(-1), X16(-1), X16(-1), | ||
154 | /* 0x58 */ -1, -1, -1, -1,'\\', -1, -1, -1, | ||
155 | /* 0x60 */ X16(-1), X8(-1), | ||
156 | /* 0x78 */ -1, -1, -1, -1, -1, -1, -1, 1, | ||
157 | /* 0x80 */ /* set to 0 */ | ||
158 | }; | ||
159 | |||
160 | static inline int sq_must_quote(char c) | ||
161 | { | ||
162 | return sq_lookup[(unsigned char)c] + quote_path_fully > 0; | ||
163 | } | ||
164 | |||
165 | /* returns the longest prefix not needing a quote up to maxlen if positive. | ||
166 | This stops at the first \0 because it's marked as a character needing an | ||
167 | escape */ | ||
168 | static size_t next_quote_pos(const char *s, ssize_t maxlen) | ||
169 | { | ||
170 | size_t len; | ||
171 | if (maxlen < 0) { | ||
172 | for (len = 0; !sq_must_quote(s[len]); len++); | ||
173 | } else { | ||
174 | for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++); | ||
175 | } | ||
176 | return len; | ||
177 | } | ||
178 | |||
179 | /* | ||
180 | * C-style name quoting. | ||
181 | * | ||
182 | * (1) if sb and fp are both NULL, inspect the input name and counts the | ||
183 | * number of bytes that are needed to hold c_style quoted version of name, | ||
184 | * counting the double quotes around it but not terminating NUL, and | ||
185 | * returns it. | ||
186 | * However, if name does not need c_style quoting, it returns 0. | ||
187 | * | ||
188 | * (2) if sb or fp are not NULL, it emits the c_style quoted version | ||
189 | * of name, enclosed with double quotes if asked and needed only. | ||
190 | * Return value is the same as in (1). | ||
191 | */ | ||
192 | static size_t quote_c_style_counted(const char *name, ssize_t maxlen, | ||
193 | struct strbuf *sb, FILE *fp, int no_dq) | ||
194 | { | ||
195 | #undef EMIT | ||
196 | #define EMIT(c) \ | ||
197 | do { \ | ||
198 | if (sb) strbuf_addch(sb, (c)); \ | ||
199 | if (fp) fputc((c), fp); \ | ||
200 | count++; \ | ||
201 | } while (0) | ||
202 | #define EMITBUF(s, l) \ | ||
203 | do { \ | ||
204 | int __ret; \ | ||
205 | if (sb) strbuf_add(sb, (s), (l)); \ | ||
206 | if (fp) __ret = fwrite((s), (l), 1, fp); \ | ||
207 | count += (l); \ | ||
208 | } while (0) | ||
209 | |||
210 | size_t len, count = 0; | ||
211 | const char *p = name; | ||
212 | |||
213 | for (;;) { | ||
214 | int ch; | ||
215 | |||
216 | len = next_quote_pos(p, maxlen); | ||
217 | if (len == maxlen || !p[len]) | ||
218 | break; | ||
219 | |||
220 | if (!no_dq && p == name) | ||
221 | EMIT('"'); | ||
222 | |||
223 | EMITBUF(p, len); | ||
224 | EMIT('\\'); | ||
225 | p += len; | ||
226 | ch = (unsigned char)*p++; | ||
227 | if (sq_lookup[ch] >= ' ') { | ||
228 | EMIT(sq_lookup[ch]); | ||
229 | } else { | ||
230 | EMIT(((ch >> 6) & 03) + '0'); | ||
231 | EMIT(((ch >> 3) & 07) + '0'); | ||
232 | EMIT(((ch >> 0) & 07) + '0'); | ||
233 | } | ||
234 | } | ||
235 | |||
236 | EMITBUF(p, len); | ||
237 | if (p == name) /* no ending quote needed */ | ||
238 | return 0; | ||
239 | |||
240 | if (!no_dq) | ||
241 | EMIT('"'); | ||
242 | return count; | ||
243 | } | ||
244 | |||
245 | size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq) | ||
246 | { | ||
247 | return quote_c_style_counted(name, -1, sb, fp, nodq); | ||
248 | } | ||
249 | |||
250 | void quote_two_c_style(struct strbuf *sb, const char *prefix, const char *path, int nodq) | ||
251 | { | ||
252 | if (quote_c_style(prefix, NULL, NULL, 0) || | ||
253 | quote_c_style(path, NULL, NULL, 0)) { | ||
254 | if (!nodq) | ||
255 | strbuf_addch(sb, '"'); | ||
256 | quote_c_style(prefix, sb, NULL, 1); | ||
257 | quote_c_style(path, sb, NULL, 1); | ||
258 | if (!nodq) | ||
259 | strbuf_addch(sb, '"'); | ||
260 | } else { | ||
261 | strbuf_addstr(sb, prefix); | ||
262 | strbuf_addstr(sb, path); | ||
263 | } | ||
264 | } | ||
265 | |||
266 | void write_name_quoted(const char *name, FILE *fp, int terminator) | ||
267 | { | ||
268 | if (terminator) { | ||
269 | quote_c_style(name, NULL, fp, 0); | ||
270 | } else { | ||
271 | fputs(name, fp); | ||
272 | } | ||
273 | fputc(terminator, fp); | ||
274 | } | ||
275 | |||
276 | extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, | ||
277 | const char *name, FILE *fp, int terminator) | ||
278 | { | ||
279 | int needquote = 0; | ||
280 | |||
281 | if (terminator) { | ||
282 | needquote = next_quote_pos(pfx, pfxlen) < pfxlen | ||
283 | || name[next_quote_pos(name, -1)]; | ||
284 | } | ||
285 | if (needquote) { | ||
286 | fputc('"', fp); | ||
287 | quote_c_style_counted(pfx, pfxlen, NULL, fp, 1); | ||
288 | quote_c_style(name, NULL, fp, 1); | ||
289 | fputc('"', fp); | ||
290 | } else { | ||
291 | int ret; | ||
292 | |||
293 | ret = fwrite(pfx, pfxlen, 1, fp); | ||
294 | fputs(name, fp); | ||
295 | } | ||
296 | fputc(terminator, fp); | ||
297 | } | ||
298 | |||
299 | /* quote path as relative to the given prefix */ | ||
300 | char *quote_path_relative(const char *in, int len, | ||
301 | struct strbuf *out, const char *prefix) | ||
302 | { | ||
303 | int needquote; | ||
304 | |||
305 | if (len < 0) | ||
306 | len = strlen(in); | ||
307 | |||
308 | /* "../" prefix itself does not need quoting, but "in" might. */ | ||
309 | needquote = next_quote_pos(in, len) < len; | ||
310 | strbuf_setlen(out, 0); | ||
311 | strbuf_grow(out, len); | ||
312 | |||
313 | if (needquote) | ||
314 | strbuf_addch(out, '"'); | ||
315 | if (prefix) { | ||
316 | int off = 0; | ||
317 | while (prefix[off] && off < len && prefix[off] == in[off]) | ||
318 | if (prefix[off] == '/') { | ||
319 | prefix += off + 1; | ||
320 | in += off + 1; | ||
321 | len -= off + 1; | ||
322 | off = 0; | ||
323 | } else | ||
324 | off++; | ||
325 | |||
326 | for (; *prefix; prefix++) | ||
327 | if (*prefix == '/') | ||
328 | strbuf_addstr(out, "../"); | ||
329 | } | ||
330 | |||
331 | quote_c_style_counted (in, len, out, NULL, 1); | ||
332 | |||
333 | if (needquote) | ||
334 | strbuf_addch(out, '"'); | ||
335 | if (!out->len) | ||
336 | strbuf_addstr(out, "./"); | ||
337 | |||
338 | return out->buf; | ||
339 | } | ||
340 | |||
341 | /* | ||
342 | * C-style name unquoting. | ||
343 | * | ||
344 | * Quoted should point at the opening double quote. | ||
345 | * + Returns 0 if it was able to unquote the string properly, and appends the | ||
346 | * result in the strbuf `sb'. | ||
347 | * + Returns -1 in case of error, and doesn't touch the strbuf. Though note | ||
348 | * that this function will allocate memory in the strbuf, so calling | ||
349 | * strbuf_release is mandatory whichever result unquote_c_style returns. | ||
350 | * | ||
351 | * Updates endp pointer to point at one past the ending double quote if given. | ||
352 | */ | ||
353 | int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp) | ||
354 | { | ||
355 | size_t oldlen = sb->len, len; | ||
356 | int ch, ac; | ||
357 | |||
358 | if (*quoted++ != '"') | ||
359 | return -1; | ||
360 | |||
361 | for (;;) { | ||
362 | len = strcspn(quoted, "\"\\"); | ||
363 | strbuf_add(sb, quoted, len); | ||
364 | quoted += len; | ||
365 | |||
366 | switch (*quoted++) { | ||
367 | case '"': | ||
368 | if (endp) | ||
369 | *endp = quoted; | ||
370 | return 0; | ||
371 | case '\\': | ||
372 | break; | ||
373 | default: | ||
374 | goto error; | ||
375 | } | ||
376 | |||
377 | switch ((ch = *quoted++)) { | ||
378 | case 'a': ch = '\a'; break; | ||
379 | case 'b': ch = '\b'; break; | ||
380 | case 'f': ch = '\f'; break; | ||
381 | case 'n': ch = '\n'; break; | ||
382 | case 'r': ch = '\r'; break; | ||
383 | case 't': ch = '\t'; break; | ||
384 | case 'v': ch = '\v'; break; | ||
385 | |||
386 | case '\\': case '"': | ||
387 | break; /* verbatim */ | ||
388 | |||
389 | /* octal values with first digit over 4 overflow */ | ||
390 | case '0': case '1': case '2': case '3': | ||
391 | ac = ((ch - '0') << 6); | ||
392 | if ((ch = *quoted++) < '0' || '7' < ch) | ||
393 | goto error; | ||
394 | ac |= ((ch - '0') << 3); | ||
395 | if ((ch = *quoted++) < '0' || '7' < ch) | ||
396 | goto error; | ||
397 | ac |= (ch - '0'); | ||
398 | ch = ac; | ||
399 | break; | ||
400 | default: | ||
401 | goto error; | ||
402 | } | ||
403 | strbuf_addch(sb, ch); | ||
404 | } | ||
405 | |||
406 | error: | ||
407 | strbuf_setlen(sb, oldlen); | ||
408 | return -1; | ||
409 | } | ||
410 | |||
411 | /* quoting as a string literal for other languages */ | ||
412 | |||
413 | void perl_quote_print(FILE *stream, const char *src) | ||
414 | { | ||
415 | const char sq = '\''; | ||
416 | const char bq = '\\'; | ||
417 | char c; | ||
418 | |||
419 | fputc(sq, stream); | ||
420 | while ((c = *src++)) { | ||
421 | if (c == sq || c == bq) | ||
422 | fputc(bq, stream); | ||
423 | fputc(c, stream); | ||
424 | } | ||
425 | fputc(sq, stream); | ||
426 | } | ||
427 | |||
428 | void python_quote_print(FILE *stream, const char *src) | ||
429 | { | ||
430 | const char sq = '\''; | ||
431 | const char bq = '\\'; | ||
432 | const char nl = '\n'; | ||
433 | char c; | ||
434 | |||
435 | fputc(sq, stream); | ||
436 | while ((c = *src++)) { | ||
437 | if (c == nl) { | ||
438 | fputc(bq, stream); | ||
439 | fputc('n', stream); | ||
440 | continue; | ||
441 | } | ||
442 | if (c == sq || c == bq) | ||
443 | fputc(bq, stream); | ||
444 | fputc(c, stream); | ||
445 | } | ||
446 | fputc(sq, stream); | ||
447 | } | ||
448 | |||
449 | void tcl_quote_print(FILE *stream, const char *src) | ||
450 | { | ||
451 | char c; | ||
452 | |||
453 | fputc('"', stream); | ||
454 | while ((c = *src++)) { | ||
455 | switch (c) { | ||
456 | case '[': case ']': | ||
457 | case '{': case '}': | ||
458 | case '$': case '\\': case '"': | ||
459 | fputc('\\', stream); | ||
460 | default: | ||
461 | fputc(c, stream); | ||
462 | break; | ||
463 | case '\f': | ||
464 | fputs("\\f", stream); | ||
465 | break; | ||
466 | case '\r': | ||
467 | fputs("\\r", stream); | ||
468 | break; | ||
469 | case '\n': | ||
470 | fputs("\\n", stream); | ||
471 | break; | ||
472 | case '\t': | ||
473 | fputs("\\t", stream); | ||
474 | break; | ||
475 | case '\v': | ||
476 | fputs("\\v", stream); | ||
477 | break; | ||
478 | } | ||
479 | } | ||
480 | fputc('"', stream); | ||
481 | } | ||
diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h new file mode 100644 index 000000000000..5dfad89816db --- /dev/null +++ b/tools/perf/util/quote.h | |||
@@ -0,0 +1,68 @@ | |||
1 | #ifndef QUOTE_H | ||
2 | #define QUOTE_H | ||
3 | |||
4 | #include <stddef.h> | ||
5 | #include <stdio.h> | ||
6 | |||
7 | /* Help to copy the thing properly quoted for the shell safety. | ||
8 | * any single quote is replaced with '\'', any exclamation point | ||
9 | * is replaced with '\!', and the whole thing is enclosed in a | ||
10 | * single quote pair. | ||
11 | * | ||
12 | * For example, if you are passing the result to system() as an | ||
13 | * argument: | ||
14 | * | ||
15 | * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1)) | ||
16 | * | ||
17 | * would be appropriate. If the system() is going to call ssh to | ||
18 | * run the command on the other side: | ||
19 | * | ||
20 | * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1)); | ||
21 | * sprintf(rcmd, "ssh %s %s", sq_util/quote.host), sq_quote(cmd)); | ||
22 | * | ||
23 | * Note that the above examples leak memory! Remember to free result from | ||
24 | * sq_quote() in a real application. | ||
25 | * | ||
26 | * sq_quote_buf() writes to an existing buffer of specified size; it | ||
27 | * will return the number of characters that would have been written | ||
28 | * excluding the final null regardless of the buffer size. | ||
29 | */ | ||
30 | |||
31 | extern void sq_quote_print(FILE *stream, const char *src); | ||
32 | |||
33 | extern void sq_quote_buf(struct strbuf *, const char *src); | ||
34 | extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); | ||
35 | |||
36 | /* This unwraps what sq_quote() produces in place, but returns | ||
37 | * NULL if the input does not look like what sq_quote would have | ||
38 | * produced. | ||
39 | */ | ||
40 | extern char *sq_dequote(char *); | ||
41 | |||
42 | /* | ||
43 | * Same as the above, but can be used to unwrap many arguments in the | ||
44 | * same string separated by space. "next" is changed to point to the | ||
45 | * next argument that should be passed as first parameter. When there | ||
46 | * is no more argument to be dequoted, "next" is updated to point to NULL. | ||
47 | */ | ||
48 | extern char *sq_dequote_step(char *arg, char **next); | ||
49 | extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc); | ||
50 | |||
51 | extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp); | ||
52 | extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq); | ||
53 | extern void quote_two_c_style(struct strbuf *, const char *, const char *, int); | ||
54 | |||
55 | extern void write_name_quoted(const char *name, FILE *, int terminator); | ||
56 | extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, | ||
57 | const char *name, FILE *, int terminator); | ||
58 | |||
59 | /* quote path as relative to the given prefix */ | ||
60 | char *quote_path_relative(const char *in, int len, | ||
61 | struct strbuf *out, const char *prefix); | ||
62 | |||
63 | /* quoting as a string literal for other languages */ | ||
64 | extern void perl_quote_print(FILE *stream, const char *src); | ||
65 | extern void python_quote_print(FILE *stream, const char *src); | ||
66 | extern void tcl_quote_print(FILE *stream, const char *src); | ||
67 | |||
68 | #endif | ||
diff --git a/tools/perf/util/rbtree.c b/tools/perf/util/rbtree.c new file mode 100644 index 000000000000..b15ba9c7cb3f --- /dev/null +++ b/tools/perf/util/rbtree.c | |||
@@ -0,0 +1,383 @@ | |||
1 | /* | ||
2 | Red Black Trees | ||
3 | (C) 1999 Andrea Arcangeli <andrea@suse.de> | ||
4 | (C) 2002 David Woodhouse <dwmw2@infradead.org> | ||
5 | |||
6 | This program is free software; you can redistribute it and/or modify | ||
7 | it under the terms of the GNU General Public License as published by | ||
8 | the Free Software Foundation; either version 2 of the License, or | ||
9 | (at your option) any later version. | ||
10 | |||
11 | This program is distributed in the hope that it will be useful, | ||
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | GNU General Public License for more details. | ||
15 | |||
16 | You should have received a copy of the GNU General Public License | ||
17 | along with this program; if not, write to the Free Software | ||
18 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
19 | |||
20 | linux/lib/rbtree.c | ||
21 | */ | ||
22 | |||
23 | #include "rbtree.h" | ||
24 | |||
25 | static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) | ||
26 | { | ||
27 | struct rb_node *right = node->rb_right; | ||
28 | struct rb_node *parent = rb_parent(node); | ||
29 | |||
30 | if ((node->rb_right = right->rb_left)) | ||
31 | rb_set_parent(right->rb_left, node); | ||
32 | right->rb_left = node; | ||
33 | |||
34 | rb_set_parent(right, parent); | ||
35 | |||
36 | if (parent) | ||
37 | { | ||
38 | if (node == parent->rb_left) | ||
39 | parent->rb_left = right; | ||
40 | else | ||
41 | parent->rb_right = right; | ||
42 | } | ||
43 | else | ||
44 | root->rb_node = right; | ||
45 | rb_set_parent(node, right); | ||
46 | } | ||
47 | |||
48 | static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) | ||
49 | { | ||
50 | struct rb_node *left = node->rb_left; | ||
51 | struct rb_node *parent = rb_parent(node); | ||
52 | |||
53 | if ((node->rb_left = left->rb_right)) | ||
54 | rb_set_parent(left->rb_right, node); | ||
55 | left->rb_right = node; | ||
56 | |||
57 | rb_set_parent(left, parent); | ||
58 | |||
59 | if (parent) | ||
60 | { | ||
61 | if (node == parent->rb_right) | ||
62 | parent->rb_right = left; | ||
63 | else | ||
64 | parent->rb_left = left; | ||
65 | } | ||
66 | else | ||
67 | root->rb_node = left; | ||
68 | rb_set_parent(node, left); | ||
69 | } | ||
70 | |||
71 | void rb_insert_color(struct rb_node *node, struct rb_root *root) | ||
72 | { | ||
73 | struct rb_node *parent, *gparent; | ||
74 | |||
75 | while ((parent = rb_parent(node)) && rb_is_red(parent)) | ||
76 | { | ||
77 | gparent = rb_parent(parent); | ||
78 | |||
79 | if (parent == gparent->rb_left) | ||
80 | { | ||
81 | { | ||
82 | register struct rb_node *uncle = gparent->rb_right; | ||
83 | if (uncle && rb_is_red(uncle)) | ||
84 | { | ||
85 | rb_set_black(uncle); | ||
86 | rb_set_black(parent); | ||
87 | rb_set_red(gparent); | ||
88 | node = gparent; | ||
89 | continue; | ||
90 | } | ||
91 | } | ||
92 | |||
93 | if (parent->rb_right == node) | ||
94 | { | ||
95 | register struct rb_node *tmp; | ||
96 | __rb_rotate_left(parent, root); | ||
97 | tmp = parent; | ||
98 | parent = node; | ||
99 | node = tmp; | ||
100 | } | ||
101 | |||
102 | rb_set_black(parent); | ||
103 | rb_set_red(gparent); | ||
104 | __rb_rotate_right(gparent, root); | ||
105 | } else { | ||
106 | { | ||
107 | register struct rb_node *uncle = gparent->rb_left; | ||
108 | if (uncle && rb_is_red(uncle)) | ||
109 | { | ||
110 | rb_set_black(uncle); | ||
111 | rb_set_black(parent); | ||
112 | rb_set_red(gparent); | ||
113 | node = gparent; | ||
114 | continue; | ||
115 | } | ||
116 | } | ||
117 | |||
118 | if (parent->rb_left == node) | ||
119 | { | ||
120 | register struct rb_node *tmp; | ||
121 | __rb_rotate_right(parent, root); | ||
122 | tmp = parent; | ||
123 | parent = node; | ||
124 | node = tmp; | ||
125 | } | ||
126 | |||
127 | rb_set_black(parent); | ||
128 | rb_set_red(gparent); | ||
129 | __rb_rotate_left(gparent, root); | ||
130 | } | ||
131 | } | ||
132 | |||
133 | rb_set_black(root->rb_node); | ||
134 | } | ||
135 | |||
136 | static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, | ||
137 | struct rb_root *root) | ||
138 | { | ||
139 | struct rb_node *other; | ||
140 | |||
141 | while ((!node || rb_is_black(node)) && node != root->rb_node) | ||
142 | { | ||
143 | if (parent->rb_left == node) | ||
144 | { | ||
145 | other = parent->rb_right; | ||
146 | if (rb_is_red(other)) | ||
147 | { | ||
148 | rb_set_black(other); | ||
149 | rb_set_red(parent); | ||
150 | __rb_rotate_left(parent, root); | ||
151 | other = parent->rb_right; | ||
152 | } | ||
153 | if ((!other->rb_left || rb_is_black(other->rb_left)) && | ||
154 | (!other->rb_right || rb_is_black(other->rb_right))) | ||
155 | { | ||
156 | rb_set_red(other); | ||
157 | node = parent; | ||
158 | parent = rb_parent(node); | ||
159 | } | ||
160 | else | ||
161 | { | ||
162 | if (!other->rb_right || rb_is_black(other->rb_right)) | ||
163 | { | ||
164 | rb_set_black(other->rb_left); | ||
165 | rb_set_red(other); | ||
166 | __rb_rotate_right(other, root); | ||
167 | other = parent->rb_right; | ||
168 | } | ||
169 | rb_set_color(other, rb_color(parent)); | ||
170 | rb_set_black(parent); | ||
171 | rb_set_black(other->rb_right); | ||
172 | __rb_rotate_left(parent, root); | ||
173 | node = root->rb_node; | ||
174 | break; | ||
175 | } | ||
176 | } | ||
177 | else | ||
178 | { | ||
179 | other = parent->rb_left; | ||
180 | if (rb_is_red(other)) | ||
181 | { | ||
182 | rb_set_black(other); | ||
183 | rb_set_red(parent); | ||
184 | __rb_rotate_right(parent, root); | ||
185 | other = parent->rb_left; | ||
186 | } | ||
187 | if ((!other->rb_left || rb_is_black(other->rb_left)) && | ||
188 | (!other->rb_right || rb_is_black(other->rb_right))) | ||
189 | { | ||
190 | rb_set_red(other); | ||
191 | node = parent; | ||
192 | parent = rb_parent(node); | ||
193 | } | ||
194 | else | ||
195 | { | ||
196 | if (!other->rb_left || rb_is_black(other->rb_left)) | ||
197 | { | ||
198 | rb_set_black(other->rb_right); | ||
199 | rb_set_red(other); | ||
200 | __rb_rotate_left(other, root); | ||
201 | other = parent->rb_left; | ||
202 | } | ||
203 | rb_set_color(other, rb_color(parent)); | ||
204 | rb_set_black(parent); | ||
205 | rb_set_black(other->rb_left); | ||
206 | __rb_rotate_right(parent, root); | ||
207 | node = root->rb_node; | ||
208 | break; | ||
209 | } | ||
210 | } | ||
211 | } | ||
212 | if (node) | ||
213 | rb_set_black(node); | ||
214 | } | ||
215 | |||
216 | void rb_erase(struct rb_node *node, struct rb_root *root) | ||
217 | { | ||
218 | struct rb_node *child, *parent; | ||
219 | int color; | ||
220 | |||
221 | if (!node->rb_left) | ||
222 | child = node->rb_right; | ||
223 | else if (!node->rb_right) | ||
224 | child = node->rb_left; | ||
225 | else | ||
226 | { | ||
227 | struct rb_node *old = node, *left; | ||
228 | |||
229 | node = node->rb_right; | ||
230 | while ((left = node->rb_left) != NULL) | ||
231 | node = left; | ||
232 | child = node->rb_right; | ||
233 | parent = rb_parent(node); | ||
234 | color = rb_color(node); | ||
235 | |||
236 | if (child) | ||
237 | rb_set_parent(child, parent); | ||
238 | if (parent == old) { | ||
239 | parent->rb_right = child; | ||
240 | parent = node; | ||
241 | } else | ||
242 | parent->rb_left = child; | ||
243 | |||
244 | node->rb_parent_color = old->rb_parent_color; | ||
245 | node->rb_right = old->rb_right; | ||
246 | node->rb_left = old->rb_left; | ||
247 | |||
248 | if (rb_parent(old)) | ||
249 | { | ||
250 | if (rb_parent(old)->rb_left == old) | ||
251 | rb_parent(old)->rb_left = node; | ||
252 | else | ||
253 | rb_parent(old)->rb_right = node; | ||
254 | } else | ||
255 | root->rb_node = node; | ||
256 | |||
257 | rb_set_parent(old->rb_left, node); | ||
258 | if (old->rb_right) | ||
259 | rb_set_parent(old->rb_right, node); | ||
260 | goto color; | ||
261 | } | ||
262 | |||
263 | parent = rb_parent(node); | ||
264 | color = rb_color(node); | ||
265 | |||
266 | if (child) | ||
267 | rb_set_parent(child, parent); | ||
268 | if (parent) | ||
269 | { | ||
270 | if (parent->rb_left == node) | ||
271 | parent->rb_left = child; | ||
272 | else | ||
273 | parent->rb_right = child; | ||
274 | } | ||
275 | else | ||
276 | root->rb_node = child; | ||
277 | |||
278 | color: | ||
279 | if (color == RB_BLACK) | ||
280 | __rb_erase_color(child, parent, root); | ||
281 | } | ||
282 | |||
283 | /* | ||
284 | * This function returns the first node (in sort order) of the tree. | ||
285 | */ | ||
286 | struct rb_node *rb_first(const struct rb_root *root) | ||
287 | { | ||
288 | struct rb_node *n; | ||
289 | |||
290 | n = root->rb_node; | ||
291 | if (!n) | ||
292 | return NULL; | ||
293 | while (n->rb_left) | ||
294 | n = n->rb_left; | ||
295 | return n; | ||
296 | } | ||
297 | |||
298 | struct rb_node *rb_last(const struct rb_root *root) | ||
299 | { | ||
300 | struct rb_node *n; | ||
301 | |||
302 | n = root->rb_node; | ||
303 | if (!n) | ||
304 | return NULL; | ||
305 | while (n->rb_right) | ||
306 | n = n->rb_right; | ||
307 | return n; | ||
308 | } | ||
309 | |||
310 | struct rb_node *rb_next(const struct rb_node *node) | ||
311 | { | ||
312 | struct rb_node *parent; | ||
313 | |||
314 | if (rb_parent(node) == node) | ||
315 | return NULL; | ||
316 | |||
317 | /* If we have a right-hand child, go down and then left as far | ||
318 | as we can. */ | ||
319 | if (node->rb_right) { | ||
320 | node = node->rb_right; | ||
321 | while (node->rb_left) | ||
322 | node=node->rb_left; | ||
323 | return (struct rb_node *)node; | ||
324 | } | ||
325 | |||
326 | /* No right-hand children. Everything down and left is | ||
327 | smaller than us, so any 'next' node must be in the general | ||
328 | direction of our parent. Go up the tree; any time the | ||
329 | ancestor is a right-hand child of its parent, keep going | ||
330 | up. First time it's a left-hand child of its parent, said | ||
331 | parent is our 'next' node. */ | ||
332 | while ((parent = rb_parent(node)) && node == parent->rb_right) | ||
333 | node = parent; | ||
334 | |||
335 | return parent; | ||
336 | } | ||
337 | |||
338 | struct rb_node *rb_prev(const struct rb_node *node) | ||
339 | { | ||
340 | struct rb_node *parent; | ||
341 | |||
342 | if (rb_parent(node) == node) | ||
343 | return NULL; | ||
344 | |||
345 | /* If we have a left-hand child, go down and then right as far | ||
346 | as we can. */ | ||
347 | if (node->rb_left) { | ||
348 | node = node->rb_left; | ||
349 | while (node->rb_right) | ||
350 | node=node->rb_right; | ||
351 | return (struct rb_node *)node; | ||
352 | } | ||
353 | |||
354 | /* No left-hand children. Go up till we find an ancestor which | ||
355 | is a right-hand child of its parent */ | ||
356 | while ((parent = rb_parent(node)) && node == parent->rb_left) | ||
357 | node = parent; | ||
358 | |||
359 | return parent; | ||
360 | } | ||
361 | |||
362 | void rb_replace_node(struct rb_node *victim, struct rb_node *new, | ||
363 | struct rb_root *root) | ||
364 | { | ||
365 | struct rb_node *parent = rb_parent(victim); | ||
366 | |||
367 | /* Set the surrounding nodes to point to the replacement */ | ||
368 | if (parent) { | ||
369 | if (victim == parent->rb_left) | ||
370 | parent->rb_left = new; | ||
371 | else | ||
372 | parent->rb_right = new; | ||
373 | } else { | ||
374 | root->rb_node = new; | ||
375 | } | ||
376 | if (victim->rb_left) | ||
377 | rb_set_parent(victim->rb_left, new); | ||
378 | if (victim->rb_right) | ||
379 | rb_set_parent(victim->rb_right, new); | ||
380 | |||
381 | /* Copy the pointers/colour from the victim to the replacement */ | ||
382 | *new = *victim; | ||
383 | } | ||
diff --git a/tools/perf/util/rbtree.h b/tools/perf/util/rbtree.h new file mode 100644 index 000000000000..6bdc488a47fb --- /dev/null +++ b/tools/perf/util/rbtree.h | |||
@@ -0,0 +1,171 @@ | |||
1 | /* | ||
2 | Red Black Trees | ||
3 | (C) 1999 Andrea Arcangeli <andrea@suse.de> | ||
4 | |||
5 | This program is free software; you can redistribute it and/or modify | ||
6 | it under the terms of the GNU General Public License as published by | ||
7 | the Free Software Foundation; either version 2 of the License, or | ||
8 | (at your option) any later version. | ||
9 | |||
10 | This program is distributed in the hope that it will be useful, | ||
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | GNU General Public License for more details. | ||
14 | |||
15 | You should have received a copy of the GNU General Public License | ||
16 | along with this program; if not, write to the Free Software | ||
17 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 | |||
19 | linux/include/linux/rbtree.h | ||
20 | |||
21 | To use rbtrees you'll have to implement your own insert and search cores. | ||
22 | This will avoid us to use callbacks and to drop drammatically performances. | ||
23 | I know it's not the cleaner way, but in C (not in C++) to get | ||
24 | performances and genericity... | ||
25 | |||
26 | Some example of insert and search follows here. The search is a plain | ||
27 | normal search over an ordered tree. The insert instead must be implemented | ||
28 | int two steps: as first thing the code must insert the element in | ||
29 | order as a red leaf in the tree, then the support library function | ||
30 | rb_insert_color() must be called. Such function will do the | ||
31 | not trivial work to rebalance the rbtree if necessary. | ||
32 | |||
33 | ----------------------------------------------------------------------- | ||
34 | static inline struct page * rb_search_page_cache(struct inode * inode, | ||
35 | unsigned long offset) | ||
36 | { | ||
37 | struct rb_node * n = inode->i_rb_page_cache.rb_node; | ||
38 | struct page * page; | ||
39 | |||
40 | while (n) | ||
41 | { | ||
42 | page = rb_entry(n, struct page, rb_page_cache); | ||
43 | |||
44 | if (offset < page->offset) | ||
45 | n = n->rb_left; | ||
46 | else if (offset > page->offset) | ||
47 | n = n->rb_right; | ||
48 | else | ||
49 | return page; | ||
50 | } | ||
51 | return NULL; | ||
52 | } | ||
53 | |||
54 | static inline struct page * __rb_insert_page_cache(struct inode * inode, | ||
55 | unsigned long offset, | ||
56 | struct rb_node * node) | ||
57 | { | ||
58 | struct rb_node ** p = &inode->i_rb_page_cache.rb_node; | ||
59 | struct rb_node * parent = NULL; | ||
60 | struct page * page; | ||
61 | |||
62 | while (*p) | ||
63 | { | ||
64 | parent = *p; | ||
65 | page = rb_entry(parent, struct page, rb_page_cache); | ||
66 | |||
67 | if (offset < page->offset) | ||
68 | p = &(*p)->rb_left; | ||
69 | else if (offset > page->offset) | ||
70 | p = &(*p)->rb_right; | ||
71 | else | ||
72 | return page; | ||
73 | } | ||
74 | |||
75 | rb_link_node(node, parent, p); | ||
76 | |||
77 | return NULL; | ||
78 | } | ||
79 | |||
80 | static inline struct page * rb_insert_page_cache(struct inode * inode, | ||
81 | unsigned long offset, | ||
82 | struct rb_node * node) | ||
83 | { | ||
84 | struct page * ret; | ||
85 | if ((ret = __rb_insert_page_cache(inode, offset, node))) | ||
86 | goto out; | ||
87 | rb_insert_color(node, &inode->i_rb_page_cache); | ||
88 | out: | ||
89 | return ret; | ||
90 | } | ||
91 | ----------------------------------------------------------------------- | ||
92 | */ | ||
93 | |||
94 | #ifndef _LINUX_RBTREE_H | ||
95 | #define _LINUX_RBTREE_H | ||
96 | |||
97 | #include <stddef.h> | ||
98 | |||
99 | /** | ||
100 | * container_of - cast a member of a structure out to the containing structure | ||
101 | * @ptr: the pointer to the member. | ||
102 | * @type: the type of the container struct this is embedded in. | ||
103 | * @member: the name of the member within the struct. | ||
104 | * | ||
105 | */ | ||
106 | #define container_of(ptr, type, member) ({ \ | ||
107 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ | ||
108 | (type *)( (char *)__mptr - offsetof(type,member) );}) | ||
109 | |||
110 | struct rb_node | ||
111 | { | ||
112 | unsigned long rb_parent_color; | ||
113 | #define RB_RED 0 | ||
114 | #define RB_BLACK 1 | ||
115 | struct rb_node *rb_right; | ||
116 | struct rb_node *rb_left; | ||
117 | } __attribute__((aligned(sizeof(long)))); | ||
118 | /* The alignment might seem pointless, but allegedly CRIS needs it */ | ||
119 | |||
120 | struct rb_root | ||
121 | { | ||
122 | struct rb_node *rb_node; | ||
123 | }; | ||
124 | |||
125 | |||
126 | #define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3)) | ||
127 | #define rb_color(r) ((r)->rb_parent_color & 1) | ||
128 | #define rb_is_red(r) (!rb_color(r)) | ||
129 | #define rb_is_black(r) rb_color(r) | ||
130 | #define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0) | ||
131 | #define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0) | ||
132 | |||
133 | static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) | ||
134 | { | ||
135 | rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p; | ||
136 | } | ||
137 | static inline void rb_set_color(struct rb_node *rb, int color) | ||
138 | { | ||
139 | rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; | ||
140 | } | ||
141 | |||
142 | #define RB_ROOT (struct rb_root) { NULL, } | ||
143 | #define rb_entry(ptr, type, member) container_of(ptr, type, member) | ||
144 | |||
145 | #define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) | ||
146 | #define RB_EMPTY_NODE(node) (rb_parent(node) == node) | ||
147 | #define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) | ||
148 | |||
149 | extern void rb_insert_color(struct rb_node *, struct rb_root *); | ||
150 | extern void rb_erase(struct rb_node *, struct rb_root *); | ||
151 | |||
152 | /* Find logical next and previous nodes in a tree */ | ||
153 | extern struct rb_node *rb_next(const struct rb_node *); | ||
154 | extern struct rb_node *rb_prev(const struct rb_node *); | ||
155 | extern struct rb_node *rb_first(const struct rb_root *); | ||
156 | extern struct rb_node *rb_last(const struct rb_root *); | ||
157 | |||
158 | /* Fast replacement of a single node without remove/rebalance/add/rebalance */ | ||
159 | extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, | ||
160 | struct rb_root *root); | ||
161 | |||
162 | static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, | ||
163 | struct rb_node ** rb_link) | ||
164 | { | ||
165 | node->rb_parent_color = (unsigned long )parent; | ||
166 | node->rb_left = node->rb_right = NULL; | ||
167 | |||
168 | *rb_link = node; | ||
169 | } | ||
170 | |||
171 | #endif /* _LINUX_RBTREE_H */ | ||
diff --git a/tools/perf/util/run-command.c b/tools/perf/util/run-command.c new file mode 100644 index 000000000000..b2f5e854f40a --- /dev/null +++ b/tools/perf/util/run-command.c | |||
@@ -0,0 +1,395 @@ | |||
1 | #include "cache.h" | ||
2 | #include "run-command.h" | ||
3 | #include "exec_cmd.h" | ||
4 | |||
5 | static inline void close_pair(int fd[2]) | ||
6 | { | ||
7 | close(fd[0]); | ||
8 | close(fd[1]); | ||
9 | } | ||
10 | |||
11 | static inline void dup_devnull(int to) | ||
12 | { | ||
13 | int fd = open("/dev/null", O_RDWR); | ||
14 | dup2(fd, to); | ||
15 | close(fd); | ||
16 | } | ||
17 | |||
18 | int start_command(struct child_process *cmd) | ||
19 | { | ||
20 | int need_in, need_out, need_err; | ||
21 | int fdin[2], fdout[2], fderr[2]; | ||
22 | |||
23 | /* | ||
24 | * In case of errors we must keep the promise to close FDs | ||
25 | * that have been passed in via ->in and ->out. | ||
26 | */ | ||
27 | |||
28 | need_in = !cmd->no_stdin && cmd->in < 0; | ||
29 | if (need_in) { | ||
30 | if (pipe(fdin) < 0) { | ||
31 | if (cmd->out > 0) | ||
32 | close(cmd->out); | ||
33 | return -ERR_RUN_COMMAND_PIPE; | ||
34 | } | ||
35 | cmd->in = fdin[1]; | ||
36 | } | ||
37 | |||
38 | need_out = !cmd->no_stdout | ||
39 | && !cmd->stdout_to_stderr | ||
40 | && cmd->out < 0; | ||
41 | if (need_out) { | ||
42 | if (pipe(fdout) < 0) { | ||
43 | if (need_in) | ||
44 | close_pair(fdin); | ||
45 | else if (cmd->in) | ||
46 | close(cmd->in); | ||
47 | return -ERR_RUN_COMMAND_PIPE; | ||
48 | } | ||
49 | cmd->out = fdout[0]; | ||
50 | } | ||
51 | |||
52 | need_err = !cmd->no_stderr && cmd->err < 0; | ||
53 | if (need_err) { | ||
54 | if (pipe(fderr) < 0) { | ||
55 | if (need_in) | ||
56 | close_pair(fdin); | ||
57 | else if (cmd->in) | ||
58 | close(cmd->in); | ||
59 | if (need_out) | ||
60 | close_pair(fdout); | ||
61 | else if (cmd->out) | ||
62 | close(cmd->out); | ||
63 | return -ERR_RUN_COMMAND_PIPE; | ||
64 | } | ||
65 | cmd->err = fderr[0]; | ||
66 | } | ||
67 | |||
68 | #ifndef __MINGW32__ | ||
69 | fflush(NULL); | ||
70 | cmd->pid = fork(); | ||
71 | if (!cmd->pid) { | ||
72 | if (cmd->no_stdin) | ||
73 | dup_devnull(0); | ||
74 | else if (need_in) { | ||
75 | dup2(fdin[0], 0); | ||
76 | close_pair(fdin); | ||
77 | } else if (cmd->in) { | ||
78 | dup2(cmd->in, 0); | ||
79 | close(cmd->in); | ||
80 | } | ||
81 | |||
82 | if (cmd->no_stderr) | ||
83 | dup_devnull(2); | ||
84 | else if (need_err) { | ||
85 | dup2(fderr[1], 2); | ||
86 | close_pair(fderr); | ||
87 | } | ||
88 | |||
89 | if (cmd->no_stdout) | ||
90 | dup_devnull(1); | ||
91 | else if (cmd->stdout_to_stderr) | ||
92 | dup2(2, 1); | ||
93 | else if (need_out) { | ||
94 | dup2(fdout[1], 1); | ||
95 | close_pair(fdout); | ||
96 | } else if (cmd->out > 1) { | ||
97 | dup2(cmd->out, 1); | ||
98 | close(cmd->out); | ||
99 | } | ||
100 | |||
101 | if (cmd->dir && chdir(cmd->dir)) | ||
102 | die("exec %s: cd to %s failed (%s)", cmd->argv[0], | ||
103 | cmd->dir, strerror(errno)); | ||
104 | if (cmd->env) { | ||
105 | for (; *cmd->env; cmd->env++) { | ||
106 | if (strchr(*cmd->env, '=')) | ||
107 | putenv((char*)*cmd->env); | ||
108 | else | ||
109 | unsetenv(*cmd->env); | ||
110 | } | ||
111 | } | ||
112 | if (cmd->preexec_cb) | ||
113 | cmd->preexec_cb(); | ||
114 | if (cmd->perf_cmd) { | ||
115 | execv_perf_cmd(cmd->argv); | ||
116 | } else { | ||
117 | execvp(cmd->argv[0], (char *const*) cmd->argv); | ||
118 | } | ||
119 | exit(127); | ||
120 | } | ||
121 | #else | ||
122 | int s0 = -1, s1 = -1, s2 = -1; /* backups of stdin, stdout, stderr */ | ||
123 | const char **sargv = cmd->argv; | ||
124 | char **env = environ; | ||
125 | |||
126 | if (cmd->no_stdin) { | ||
127 | s0 = dup(0); | ||
128 | dup_devnull(0); | ||
129 | } else if (need_in) { | ||
130 | s0 = dup(0); | ||
131 | dup2(fdin[0], 0); | ||
132 | } else if (cmd->in) { | ||
133 | s0 = dup(0); | ||
134 | dup2(cmd->in, 0); | ||
135 | } | ||
136 | |||
137 | if (cmd->no_stderr) { | ||
138 | s2 = dup(2); | ||
139 | dup_devnull(2); | ||
140 | } else if (need_err) { | ||
141 | s2 = dup(2); | ||
142 | dup2(fderr[1], 2); | ||
143 | } | ||
144 | |||
145 | if (cmd->no_stdout) { | ||
146 | s1 = dup(1); | ||
147 | dup_devnull(1); | ||
148 | } else if (cmd->stdout_to_stderr) { | ||
149 | s1 = dup(1); | ||
150 | dup2(2, 1); | ||
151 | } else if (need_out) { | ||
152 | s1 = dup(1); | ||
153 | dup2(fdout[1], 1); | ||
154 | } else if (cmd->out > 1) { | ||
155 | s1 = dup(1); | ||
156 | dup2(cmd->out, 1); | ||
157 | } | ||
158 | |||
159 | if (cmd->dir) | ||
160 | die("chdir in start_command() not implemented"); | ||
161 | if (cmd->env) { | ||
162 | env = copy_environ(); | ||
163 | for (; *cmd->env; cmd->env++) | ||
164 | env = env_setenv(env, *cmd->env); | ||
165 | } | ||
166 | |||
167 | if (cmd->perf_cmd) { | ||
168 | cmd->argv = prepare_perf_cmd(cmd->argv); | ||
169 | } | ||
170 | |||
171 | cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env); | ||
172 | |||
173 | if (cmd->env) | ||
174 | free_environ(env); | ||
175 | if (cmd->perf_cmd) | ||
176 | free(cmd->argv); | ||
177 | |||
178 | cmd->argv = sargv; | ||
179 | if (s0 >= 0) | ||
180 | dup2(s0, 0), close(s0); | ||
181 | if (s1 >= 0) | ||
182 | dup2(s1, 1), close(s1); | ||
183 | if (s2 >= 0) | ||
184 | dup2(s2, 2), close(s2); | ||
185 | #endif | ||
186 | |||
187 | if (cmd->pid < 0) { | ||
188 | int err = errno; | ||
189 | if (need_in) | ||
190 | close_pair(fdin); | ||
191 | else if (cmd->in) | ||
192 | close(cmd->in); | ||
193 | if (need_out) | ||
194 | close_pair(fdout); | ||
195 | else if (cmd->out) | ||
196 | close(cmd->out); | ||
197 | if (need_err) | ||
198 | close_pair(fderr); | ||
199 | return err == ENOENT ? | ||
200 | -ERR_RUN_COMMAND_EXEC : | ||
201 | -ERR_RUN_COMMAND_FORK; | ||
202 | } | ||
203 | |||
204 | if (need_in) | ||
205 | close(fdin[0]); | ||
206 | else if (cmd->in) | ||
207 | close(cmd->in); | ||
208 | |||
209 | if (need_out) | ||
210 | close(fdout[1]); | ||
211 | else if (cmd->out) | ||
212 | close(cmd->out); | ||
213 | |||
214 | if (need_err) | ||
215 | close(fderr[1]); | ||
216 | |||
217 | return 0; | ||
218 | } | ||
219 | |||
220 | static int wait_or_whine(pid_t pid) | ||
221 | { | ||
222 | for (;;) { | ||
223 | int status, code; | ||
224 | pid_t waiting = waitpid(pid, &status, 0); | ||
225 | |||
226 | if (waiting < 0) { | ||
227 | if (errno == EINTR) | ||
228 | continue; | ||
229 | error("waitpid failed (%s)", strerror(errno)); | ||
230 | return -ERR_RUN_COMMAND_WAITPID; | ||
231 | } | ||
232 | if (waiting != pid) | ||
233 | return -ERR_RUN_COMMAND_WAITPID_WRONG_PID; | ||
234 | if (WIFSIGNALED(status)) | ||
235 | return -ERR_RUN_COMMAND_WAITPID_SIGNAL; | ||
236 | |||
237 | if (!WIFEXITED(status)) | ||
238 | return -ERR_RUN_COMMAND_WAITPID_NOEXIT; | ||
239 | code = WEXITSTATUS(status); | ||
240 | switch (code) { | ||
241 | case 127: | ||
242 | return -ERR_RUN_COMMAND_EXEC; | ||
243 | case 0: | ||
244 | return 0; | ||
245 | default: | ||
246 | return -code; | ||
247 | } | ||
248 | } | ||
249 | } | ||
250 | |||
251 | int finish_command(struct child_process *cmd) | ||
252 | { | ||
253 | return wait_or_whine(cmd->pid); | ||
254 | } | ||
255 | |||
256 | int run_command(struct child_process *cmd) | ||
257 | { | ||
258 | int code = start_command(cmd); | ||
259 | if (code) | ||
260 | return code; | ||
261 | return finish_command(cmd); | ||
262 | } | ||
263 | |||
264 | static void prepare_run_command_v_opt(struct child_process *cmd, | ||
265 | const char **argv, | ||
266 | int opt) | ||
267 | { | ||
268 | memset(cmd, 0, sizeof(*cmd)); | ||
269 | cmd->argv = argv; | ||
270 | cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0; | ||
271 | cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0; | ||
272 | cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0; | ||
273 | } | ||
274 | |||
275 | int run_command_v_opt(const char **argv, int opt) | ||
276 | { | ||
277 | struct child_process cmd; | ||
278 | prepare_run_command_v_opt(&cmd, argv, opt); | ||
279 | return run_command(&cmd); | ||
280 | } | ||
281 | |||
282 | int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env) | ||
283 | { | ||
284 | struct child_process cmd; | ||
285 | prepare_run_command_v_opt(&cmd, argv, opt); | ||
286 | cmd.dir = dir; | ||
287 | cmd.env = env; | ||
288 | return run_command(&cmd); | ||
289 | } | ||
290 | |||
291 | #ifdef __MINGW32__ | ||
292 | static __stdcall unsigned run_thread(void *data) | ||
293 | { | ||
294 | struct async *async = data; | ||
295 | return async->proc(async->fd_for_proc, async->data); | ||
296 | } | ||
297 | #endif | ||
298 | |||
299 | int start_async(struct async *async) | ||
300 | { | ||
301 | int pipe_out[2]; | ||
302 | |||
303 | if (pipe(pipe_out) < 0) | ||
304 | return error("cannot create pipe: %s", strerror(errno)); | ||
305 | async->out = pipe_out[0]; | ||
306 | |||
307 | #ifndef __MINGW32__ | ||
308 | /* Flush stdio before fork() to avoid cloning buffers */ | ||
309 | fflush(NULL); | ||
310 | |||
311 | async->pid = fork(); | ||
312 | if (async->pid < 0) { | ||
313 | error("fork (async) failed: %s", strerror(errno)); | ||
314 | close_pair(pipe_out); | ||
315 | return -1; | ||
316 | } | ||
317 | if (!async->pid) { | ||
318 | close(pipe_out[0]); | ||
319 | exit(!!async->proc(pipe_out[1], async->data)); | ||
320 | } | ||
321 | close(pipe_out[1]); | ||
322 | #else | ||
323 | async->fd_for_proc = pipe_out[1]; | ||
324 | async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL); | ||
325 | if (!async->tid) { | ||
326 | error("cannot create thread: %s", strerror(errno)); | ||
327 | close_pair(pipe_out); | ||
328 | return -1; | ||
329 | } | ||
330 | #endif | ||
331 | return 0; | ||
332 | } | ||
333 | |||
334 | int finish_async(struct async *async) | ||
335 | { | ||
336 | #ifndef __MINGW32__ | ||
337 | int ret = 0; | ||
338 | |||
339 | if (wait_or_whine(async->pid)) | ||
340 | ret = error("waitpid (async) failed"); | ||
341 | #else | ||
342 | DWORD ret = 0; | ||
343 | if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0) | ||
344 | ret = error("waiting for thread failed: %lu", GetLastError()); | ||
345 | else if (!GetExitCodeThread(async->tid, &ret)) | ||
346 | ret = error("cannot get thread exit code: %lu", GetLastError()); | ||
347 | CloseHandle(async->tid); | ||
348 | #endif | ||
349 | return ret; | ||
350 | } | ||
351 | |||
352 | int run_hook(const char *index_file, const char *name, ...) | ||
353 | { | ||
354 | struct child_process hook; | ||
355 | const char **argv = NULL, *env[2]; | ||
356 | char index[PATH_MAX]; | ||
357 | va_list args; | ||
358 | int ret; | ||
359 | size_t i = 0, alloc = 0; | ||
360 | |||
361 | if (access(perf_path("hooks/%s", name), X_OK) < 0) | ||
362 | return 0; | ||
363 | |||
364 | va_start(args, name); | ||
365 | ALLOC_GROW(argv, i + 1, alloc); | ||
366 | argv[i++] = perf_path("hooks/%s", name); | ||
367 | while (argv[i-1]) { | ||
368 | ALLOC_GROW(argv, i + 1, alloc); | ||
369 | argv[i++] = va_arg(args, const char *); | ||
370 | } | ||
371 | va_end(args); | ||
372 | |||
373 | memset(&hook, 0, sizeof(hook)); | ||
374 | hook.argv = argv; | ||
375 | hook.no_stdin = 1; | ||
376 | hook.stdout_to_stderr = 1; | ||
377 | if (index_file) { | ||
378 | snprintf(index, sizeof(index), "PERF_INDEX_FILE=%s", index_file); | ||
379 | env[0] = index; | ||
380 | env[1] = NULL; | ||
381 | hook.env = env; | ||
382 | } | ||
383 | |||
384 | ret = start_command(&hook); | ||
385 | free(argv); | ||
386 | if (ret) { | ||
387 | warning("Could not spawn %s", argv[0]); | ||
388 | return ret; | ||
389 | } | ||
390 | ret = finish_command(&hook); | ||
391 | if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL) | ||
392 | warning("%s exited due to uncaught signal", argv[0]); | ||
393 | |||
394 | return ret; | ||
395 | } | ||
diff --git a/tools/perf/util/run-command.h b/tools/perf/util/run-command.h new file mode 100644 index 000000000000..328289f23669 --- /dev/null +++ b/tools/perf/util/run-command.h | |||
@@ -0,0 +1,93 @@ | |||
1 | #ifndef RUN_COMMAND_H | ||
2 | #define RUN_COMMAND_H | ||
3 | |||
4 | enum { | ||
5 | ERR_RUN_COMMAND_FORK = 10000, | ||
6 | ERR_RUN_COMMAND_EXEC, | ||
7 | ERR_RUN_COMMAND_PIPE, | ||
8 | ERR_RUN_COMMAND_WAITPID, | ||
9 | ERR_RUN_COMMAND_WAITPID_WRONG_PID, | ||
10 | ERR_RUN_COMMAND_WAITPID_SIGNAL, | ||
11 | ERR_RUN_COMMAND_WAITPID_NOEXIT, | ||
12 | }; | ||
13 | #define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK) | ||
14 | |||
15 | struct child_process { | ||
16 | const char **argv; | ||
17 | pid_t pid; | ||
18 | /* | ||
19 | * Using .in, .out, .err: | ||
20 | * - Specify 0 for no redirections (child inherits stdin, stdout, | ||
21 | * stderr from parent). | ||
22 | * - Specify -1 to have a pipe allocated as follows: | ||
23 | * .in: returns the writable pipe end; parent writes to it, | ||
24 | * the readable pipe end becomes child's stdin | ||
25 | * .out, .err: returns the readable pipe end; parent reads from | ||
26 | * it, the writable pipe end becomes child's stdout/stderr | ||
27 | * The caller of start_command() must close the returned FDs | ||
28 | * after it has completed reading from/writing to it! | ||
29 | * - Specify > 0 to set a channel to a particular FD as follows: | ||
30 | * .in: a readable FD, becomes child's stdin | ||
31 | * .out: a writable FD, becomes child's stdout/stderr | ||
32 | * .err > 0 not supported | ||
33 | * The specified FD is closed by start_command(), even in case | ||
34 | * of errors! | ||
35 | */ | ||
36 | int in; | ||
37 | int out; | ||
38 | int err; | ||
39 | const char *dir; | ||
40 | const char *const *env; | ||
41 | unsigned no_stdin:1; | ||
42 | unsigned no_stdout:1; | ||
43 | unsigned no_stderr:1; | ||
44 | unsigned perf_cmd:1; /* if this is to be perf sub-command */ | ||
45 | unsigned stdout_to_stderr:1; | ||
46 | void (*preexec_cb)(void); | ||
47 | }; | ||
48 | |||
49 | int start_command(struct child_process *); | ||
50 | int finish_command(struct child_process *); | ||
51 | int run_command(struct child_process *); | ||
52 | |||
53 | extern int run_hook(const char *index_file, const char *name, ...); | ||
54 | |||
55 | #define RUN_COMMAND_NO_STDIN 1 | ||
56 | #define RUN_PERF_CMD 2 /*If this is to be perf sub-command */ | ||
57 | #define RUN_COMMAND_STDOUT_TO_STDERR 4 | ||
58 | int run_command_v_opt(const char **argv, int opt); | ||
59 | |||
60 | /* | ||
61 | * env (the environment) is to be formatted like environ: "VAR=VALUE". | ||
62 | * To unset an environment variable use just "VAR". | ||
63 | */ | ||
64 | int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env); | ||
65 | |||
66 | /* | ||
67 | * The purpose of the following functions is to feed a pipe by running | ||
68 | * a function asynchronously and providing output that the caller reads. | ||
69 | * | ||
70 | * It is expected that no synchronization and mutual exclusion between | ||
71 | * the caller and the feed function is necessary so that the function | ||
72 | * can run in a thread without interfering with the caller. | ||
73 | */ | ||
74 | struct async { | ||
75 | /* | ||
76 | * proc writes to fd and closes it; | ||
77 | * returns 0 on success, non-zero on failure | ||
78 | */ | ||
79 | int (*proc)(int fd, void *data); | ||
80 | void *data; | ||
81 | int out; /* caller reads from here and closes it */ | ||
82 | #ifndef __MINGW32__ | ||
83 | pid_t pid; | ||
84 | #else | ||
85 | HANDLE tid; | ||
86 | int fd_for_proc; | ||
87 | #endif | ||
88 | }; | ||
89 | |||
90 | int start_async(struct async *async); | ||
91 | int finish_async(struct async *async); | ||
92 | |||
93 | #endif | ||
diff --git a/tools/perf/util/sigchain.c b/tools/perf/util/sigchain.c new file mode 100644 index 000000000000..1118b99e57d3 --- /dev/null +++ b/tools/perf/util/sigchain.c | |||
@@ -0,0 +1,52 @@ | |||
1 | #include "sigchain.h" | ||
2 | #include "cache.h" | ||
3 | |||
4 | #define SIGCHAIN_MAX_SIGNALS 32 | ||
5 | |||
6 | struct sigchain_signal { | ||
7 | sigchain_fun *old; | ||
8 | int n; | ||
9 | int alloc; | ||
10 | }; | ||
11 | static struct sigchain_signal signals[SIGCHAIN_MAX_SIGNALS]; | ||
12 | |||
13 | static void check_signum(int sig) | ||
14 | { | ||
15 | if (sig < 1 || sig >= SIGCHAIN_MAX_SIGNALS) | ||
16 | die("BUG: signal out of range: %d", sig); | ||
17 | } | ||
18 | |||
19 | int sigchain_push(int sig, sigchain_fun f) | ||
20 | { | ||
21 | struct sigchain_signal *s = signals + sig; | ||
22 | check_signum(sig); | ||
23 | |||
24 | ALLOC_GROW(s->old, s->n + 1, s->alloc); | ||
25 | s->old[s->n] = signal(sig, f); | ||
26 | if (s->old[s->n] == SIG_ERR) | ||
27 | return -1; | ||
28 | s->n++; | ||
29 | return 0; | ||
30 | } | ||
31 | |||
32 | int sigchain_pop(int sig) | ||
33 | { | ||
34 | struct sigchain_signal *s = signals + sig; | ||
35 | check_signum(sig); | ||
36 | if (s->n < 1) | ||
37 | return 0; | ||
38 | |||
39 | if (signal(sig, s->old[s->n - 1]) == SIG_ERR) | ||
40 | return -1; | ||
41 | s->n--; | ||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | void sigchain_push_common(sigchain_fun f) | ||
46 | { | ||
47 | sigchain_push(SIGINT, f); | ||
48 | sigchain_push(SIGHUP, f); | ||
49 | sigchain_push(SIGTERM, f); | ||
50 | sigchain_push(SIGQUIT, f); | ||
51 | sigchain_push(SIGPIPE, f); | ||
52 | } | ||
diff --git a/tools/perf/util/sigchain.h b/tools/perf/util/sigchain.h new file mode 100644 index 000000000000..618083bce0c6 --- /dev/null +++ b/tools/perf/util/sigchain.h | |||
@@ -0,0 +1,11 @@ | |||
1 | #ifndef SIGCHAIN_H | ||
2 | #define SIGCHAIN_H | ||
3 | |||
4 | typedef void (*sigchain_fun)(int); | ||
5 | |||
6 | int sigchain_push(int sig, sigchain_fun f); | ||
7 | int sigchain_pop(int sig); | ||
8 | |||
9 | void sigchain_push_common(sigchain_fun f); | ||
10 | |||
11 | #endif /* SIGCHAIN_H */ | ||
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c new file mode 100644 index 000000000000..eaba09306802 --- /dev/null +++ b/tools/perf/util/strbuf.c | |||
@@ -0,0 +1,359 @@ | |||
1 | #include "cache.h" | ||
2 | |||
3 | int prefixcmp(const char *str, const char *prefix) | ||
4 | { | ||
5 | for (; ; str++, prefix++) | ||
6 | if (!*prefix) | ||
7 | return 0; | ||
8 | else if (*str != *prefix) | ||
9 | return (unsigned char)*prefix - (unsigned char)*str; | ||
10 | } | ||
11 | |||
12 | /* | ||
13 | * Used as the default ->buf value, so that people can always assume | ||
14 | * buf is non NULL and ->buf is NUL terminated even for a freshly | ||
15 | * initialized strbuf. | ||
16 | */ | ||
17 | char strbuf_slopbuf[1]; | ||
18 | |||
19 | void strbuf_init(struct strbuf *sb, size_t hint) | ||
20 | { | ||
21 | sb->alloc = sb->len = 0; | ||
22 | sb->buf = strbuf_slopbuf; | ||
23 | if (hint) | ||
24 | strbuf_grow(sb, hint); | ||
25 | } | ||
26 | |||
27 | void strbuf_release(struct strbuf *sb) | ||
28 | { | ||
29 | if (sb->alloc) { | ||
30 | free(sb->buf); | ||
31 | strbuf_init(sb, 0); | ||
32 | } | ||
33 | } | ||
34 | |||
35 | char *strbuf_detach(struct strbuf *sb, size_t *sz) | ||
36 | { | ||
37 | char *res = sb->alloc ? sb->buf : NULL; | ||
38 | if (sz) | ||
39 | *sz = sb->len; | ||
40 | strbuf_init(sb, 0); | ||
41 | return res; | ||
42 | } | ||
43 | |||
44 | void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc) | ||
45 | { | ||
46 | strbuf_release(sb); | ||
47 | sb->buf = buf; | ||
48 | sb->len = len; | ||
49 | sb->alloc = alloc; | ||
50 | strbuf_grow(sb, 0); | ||
51 | sb->buf[sb->len] = '\0'; | ||
52 | } | ||
53 | |||
54 | void strbuf_grow(struct strbuf *sb, size_t extra) | ||
55 | { | ||
56 | if (sb->len + extra + 1 <= sb->len) | ||
57 | die("you want to use way too much memory"); | ||
58 | if (!sb->alloc) | ||
59 | sb->buf = NULL; | ||
60 | ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); | ||
61 | } | ||
62 | |||
63 | void strbuf_trim(struct strbuf *sb) | ||
64 | { | ||
65 | char *b = sb->buf; | ||
66 | while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) | ||
67 | sb->len--; | ||
68 | while (sb->len > 0 && isspace(*b)) { | ||
69 | b++; | ||
70 | sb->len--; | ||
71 | } | ||
72 | memmove(sb->buf, b, sb->len); | ||
73 | sb->buf[sb->len] = '\0'; | ||
74 | } | ||
75 | void strbuf_rtrim(struct strbuf *sb) | ||
76 | { | ||
77 | while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) | ||
78 | sb->len--; | ||
79 | sb->buf[sb->len] = '\0'; | ||
80 | } | ||
81 | |||
82 | void strbuf_ltrim(struct strbuf *sb) | ||
83 | { | ||
84 | char *b = sb->buf; | ||
85 | while (sb->len > 0 && isspace(*b)) { | ||
86 | b++; | ||
87 | sb->len--; | ||
88 | } | ||
89 | memmove(sb->buf, b, sb->len); | ||
90 | sb->buf[sb->len] = '\0'; | ||
91 | } | ||
92 | |||
93 | void strbuf_tolower(struct strbuf *sb) | ||
94 | { | ||
95 | int i; | ||
96 | for (i = 0; i < sb->len; i++) | ||
97 | sb->buf[i] = tolower(sb->buf[i]); | ||
98 | } | ||
99 | |||
100 | struct strbuf **strbuf_split(const struct strbuf *sb, int delim) | ||
101 | { | ||
102 | int alloc = 2, pos = 0; | ||
103 | char *n, *p; | ||
104 | struct strbuf **ret; | ||
105 | struct strbuf *t; | ||
106 | |||
107 | ret = calloc(alloc, sizeof(struct strbuf *)); | ||
108 | p = n = sb->buf; | ||
109 | while (n < sb->buf + sb->len) { | ||
110 | int len; | ||
111 | n = memchr(n, delim, sb->len - (n - sb->buf)); | ||
112 | if (pos + 1 >= alloc) { | ||
113 | alloc = alloc * 2; | ||
114 | ret = realloc(ret, sizeof(struct strbuf *) * alloc); | ||
115 | } | ||
116 | if (!n) | ||
117 | n = sb->buf + sb->len - 1; | ||
118 | len = n - p + 1; | ||
119 | t = malloc(sizeof(struct strbuf)); | ||
120 | strbuf_init(t, len); | ||
121 | strbuf_add(t, p, len); | ||
122 | ret[pos] = t; | ||
123 | ret[++pos] = NULL; | ||
124 | p = ++n; | ||
125 | } | ||
126 | return ret; | ||
127 | } | ||
128 | |||
129 | void strbuf_list_free(struct strbuf **sbs) | ||
130 | { | ||
131 | struct strbuf **s = sbs; | ||
132 | |||
133 | while (*s) { | ||
134 | strbuf_release(*s); | ||
135 | free(*s++); | ||
136 | } | ||
137 | free(sbs); | ||
138 | } | ||
139 | |||
140 | int strbuf_cmp(const struct strbuf *a, const struct strbuf *b) | ||
141 | { | ||
142 | int len = a->len < b->len ? a->len: b->len; | ||
143 | int cmp = memcmp(a->buf, b->buf, len); | ||
144 | if (cmp) | ||
145 | return cmp; | ||
146 | return a->len < b->len ? -1: a->len != b->len; | ||
147 | } | ||
148 | |||
149 | void strbuf_splice(struct strbuf *sb, size_t pos, size_t len, | ||
150 | const void *data, size_t dlen) | ||
151 | { | ||
152 | if (pos + len < pos) | ||
153 | die("you want to use way too much memory"); | ||
154 | if (pos > sb->len) | ||
155 | die("`pos' is too far after the end of the buffer"); | ||
156 | if (pos + len > sb->len) | ||
157 | die("`pos + len' is too far after the end of the buffer"); | ||
158 | |||
159 | if (dlen >= len) | ||
160 | strbuf_grow(sb, dlen - len); | ||
161 | memmove(sb->buf + pos + dlen, | ||
162 | sb->buf + pos + len, | ||
163 | sb->len - pos - len); | ||
164 | memcpy(sb->buf + pos, data, dlen); | ||
165 | strbuf_setlen(sb, sb->len + dlen - len); | ||
166 | } | ||
167 | |||
168 | void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len) | ||
169 | { | ||
170 | strbuf_splice(sb, pos, 0, data, len); | ||
171 | } | ||
172 | |||
173 | void strbuf_remove(struct strbuf *sb, size_t pos, size_t len) | ||
174 | { | ||
175 | strbuf_splice(sb, pos, len, NULL, 0); | ||
176 | } | ||
177 | |||
178 | void strbuf_add(struct strbuf *sb, const void *data, size_t len) | ||
179 | { | ||
180 | strbuf_grow(sb, len); | ||
181 | memcpy(sb->buf + sb->len, data, len); | ||
182 | strbuf_setlen(sb, sb->len + len); | ||
183 | } | ||
184 | |||
185 | void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len) | ||
186 | { | ||
187 | strbuf_grow(sb, len); | ||
188 | memcpy(sb->buf + sb->len, sb->buf + pos, len); | ||
189 | strbuf_setlen(sb, sb->len + len); | ||
190 | } | ||
191 | |||
192 | void strbuf_addf(struct strbuf *sb, const char *fmt, ...) | ||
193 | { | ||
194 | int len; | ||
195 | va_list ap; | ||
196 | |||
197 | if (!strbuf_avail(sb)) | ||
198 | strbuf_grow(sb, 64); | ||
199 | va_start(ap, fmt); | ||
200 | len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); | ||
201 | va_end(ap); | ||
202 | if (len < 0) | ||
203 | die("your vsnprintf is broken"); | ||
204 | if (len > strbuf_avail(sb)) { | ||
205 | strbuf_grow(sb, len); | ||
206 | va_start(ap, fmt); | ||
207 | len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); | ||
208 | va_end(ap); | ||
209 | if (len > strbuf_avail(sb)) { | ||
210 | die("this should not happen, your snprintf is broken"); | ||
211 | } | ||
212 | } | ||
213 | strbuf_setlen(sb, sb->len + len); | ||
214 | } | ||
215 | |||
216 | void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, | ||
217 | void *context) | ||
218 | { | ||
219 | for (;;) { | ||
220 | const char *percent; | ||
221 | size_t consumed; | ||
222 | |||
223 | percent = strchrnul(format, '%'); | ||
224 | strbuf_add(sb, format, percent - format); | ||
225 | if (!*percent) | ||
226 | break; | ||
227 | format = percent + 1; | ||
228 | |||
229 | consumed = fn(sb, format, context); | ||
230 | if (consumed) | ||
231 | format += consumed; | ||
232 | else | ||
233 | strbuf_addch(sb, '%'); | ||
234 | } | ||
235 | } | ||
236 | |||
237 | size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, | ||
238 | void *context) | ||
239 | { | ||
240 | struct strbuf_expand_dict_entry *e = context; | ||
241 | size_t len; | ||
242 | |||
243 | for (; e->placeholder && (len = strlen(e->placeholder)); e++) { | ||
244 | if (!strncmp(placeholder, e->placeholder, len)) { | ||
245 | if (e->value) | ||
246 | strbuf_addstr(sb, e->value); | ||
247 | return len; | ||
248 | } | ||
249 | } | ||
250 | return 0; | ||
251 | } | ||
252 | |||
253 | size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) | ||
254 | { | ||
255 | size_t res; | ||
256 | size_t oldalloc = sb->alloc; | ||
257 | |||
258 | strbuf_grow(sb, size); | ||
259 | res = fread(sb->buf + sb->len, 1, size, f); | ||
260 | if (res > 0) | ||
261 | strbuf_setlen(sb, sb->len + res); | ||
262 | else if (res < 0 && oldalloc == 0) | ||
263 | strbuf_release(sb); | ||
264 | return res; | ||
265 | } | ||
266 | |||
267 | ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint) | ||
268 | { | ||
269 | size_t oldlen = sb->len; | ||
270 | size_t oldalloc = sb->alloc; | ||
271 | |||
272 | strbuf_grow(sb, hint ? hint : 8192); | ||
273 | for (;;) { | ||
274 | ssize_t cnt; | ||
275 | |||
276 | cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1); | ||
277 | if (cnt < 0) { | ||
278 | if (oldalloc == 0) | ||
279 | strbuf_release(sb); | ||
280 | else | ||
281 | strbuf_setlen(sb, oldlen); | ||
282 | return -1; | ||
283 | } | ||
284 | if (!cnt) | ||
285 | break; | ||
286 | sb->len += cnt; | ||
287 | strbuf_grow(sb, 8192); | ||
288 | } | ||
289 | |||
290 | sb->buf[sb->len] = '\0'; | ||
291 | return sb->len - oldlen; | ||
292 | } | ||
293 | |||
294 | #define STRBUF_MAXLINK (2*PATH_MAX) | ||
295 | |||
296 | int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) | ||
297 | { | ||
298 | size_t oldalloc = sb->alloc; | ||
299 | |||
300 | if (hint < 32) | ||
301 | hint = 32; | ||
302 | |||
303 | while (hint < STRBUF_MAXLINK) { | ||
304 | int len; | ||
305 | |||
306 | strbuf_grow(sb, hint); | ||
307 | len = readlink(path, sb->buf, hint); | ||
308 | if (len < 0) { | ||
309 | if (errno != ERANGE) | ||
310 | break; | ||
311 | } else if (len < hint) { | ||
312 | strbuf_setlen(sb, len); | ||
313 | return 0; | ||
314 | } | ||
315 | |||
316 | /* .. the buffer was too small - try again */ | ||
317 | hint *= 2; | ||
318 | } | ||
319 | if (oldalloc == 0) | ||
320 | strbuf_release(sb); | ||
321 | return -1; | ||
322 | } | ||
323 | |||
324 | int strbuf_getline(struct strbuf *sb, FILE *fp, int term) | ||
325 | { | ||
326 | int ch; | ||
327 | |||
328 | strbuf_grow(sb, 0); | ||
329 | if (feof(fp)) | ||
330 | return EOF; | ||
331 | |||
332 | strbuf_reset(sb); | ||
333 | while ((ch = fgetc(fp)) != EOF) { | ||
334 | if (ch == term) | ||
335 | break; | ||
336 | strbuf_grow(sb, 1); | ||
337 | sb->buf[sb->len++] = ch; | ||
338 | } | ||
339 | if (ch == EOF && sb->len == 0) | ||
340 | return EOF; | ||
341 | |||
342 | sb->buf[sb->len] = '\0'; | ||
343 | return 0; | ||
344 | } | ||
345 | |||
346 | int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint) | ||
347 | { | ||
348 | int fd, len; | ||
349 | |||
350 | fd = open(path, O_RDONLY); | ||
351 | if (fd < 0) | ||
352 | return -1; | ||
353 | len = strbuf_read(sb, fd, hint); | ||
354 | close(fd); | ||
355 | if (len < 0) | ||
356 | return -1; | ||
357 | |||
358 | return len; | ||
359 | } | ||
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h new file mode 100644 index 000000000000..9ee908a3ec5d --- /dev/null +++ b/tools/perf/util/strbuf.h | |||
@@ -0,0 +1,137 @@ | |||
1 | #ifndef STRBUF_H | ||
2 | #define STRBUF_H | ||
3 | |||
4 | /* | ||
5 | * Strbuf's can be use in many ways: as a byte array, or to store arbitrary | ||
6 | * long, overflow safe strings. | ||
7 | * | ||
8 | * Strbufs has some invariants that are very important to keep in mind: | ||
9 | * | ||
10 | * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to | ||
11 | * build complex strings/buffers whose final size isn't easily known. | ||
12 | * | ||
13 | * It is NOT legal to copy the ->buf pointer away. | ||
14 | * `strbuf_detach' is the operation that detachs a buffer from its shell | ||
15 | * while keeping the shell valid wrt its invariants. | ||
16 | * | ||
17 | * 2. the ->buf member is a byte array that has at least ->len + 1 bytes | ||
18 | * allocated. The extra byte is used to store a '\0', allowing the ->buf | ||
19 | * member to be a valid C-string. Every strbuf function ensure this | ||
20 | * invariant is preserved. | ||
21 | * | ||
22 | * Note that it is OK to "play" with the buffer directly if you work it | ||
23 | * that way: | ||
24 | * | ||
25 | * strbuf_grow(sb, SOME_SIZE); | ||
26 | * ... Here, the memory array starting at sb->buf, and of length | ||
27 | * ... strbuf_avail(sb) is all yours, and you are sure that | ||
28 | * ... strbuf_avail(sb) is at least SOME_SIZE. | ||
29 | * strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE); | ||
30 | * | ||
31 | * Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb). | ||
32 | * | ||
33 | * Doing so is safe, though if it has to be done in many places, adding the | ||
34 | * missing API to the strbuf module is the way to go. | ||
35 | * | ||
36 | * XXX: do _not_ assume that the area that is yours is of size ->alloc - 1 | ||
37 | * even if it's true in the current implementation. Alloc is somehow a | ||
38 | * "private" member that should not be messed with. | ||
39 | */ | ||
40 | |||
41 | #include <assert.h> | ||
42 | |||
43 | extern char strbuf_slopbuf[]; | ||
44 | struct strbuf { | ||
45 | size_t alloc; | ||
46 | size_t len; | ||
47 | char *buf; | ||
48 | }; | ||
49 | |||
50 | #define STRBUF_INIT { 0, 0, strbuf_slopbuf } | ||
51 | |||
52 | /*----- strbuf life cycle -----*/ | ||
53 | extern void strbuf_init(struct strbuf *, size_t); | ||
54 | extern void strbuf_release(struct strbuf *); | ||
55 | extern char *strbuf_detach(struct strbuf *, size_t *); | ||
56 | extern void strbuf_attach(struct strbuf *, void *, size_t, size_t); | ||
57 | static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) { | ||
58 | struct strbuf tmp = *a; | ||
59 | *a = *b; | ||
60 | *b = tmp; | ||
61 | } | ||
62 | |||
63 | /*----- strbuf size related -----*/ | ||
64 | static inline size_t strbuf_avail(const struct strbuf *sb) { | ||
65 | return sb->alloc ? sb->alloc - sb->len - 1 : 0; | ||
66 | } | ||
67 | |||
68 | extern void strbuf_grow(struct strbuf *, size_t); | ||
69 | |||
70 | static inline void strbuf_setlen(struct strbuf *sb, size_t len) { | ||
71 | if (!sb->alloc) | ||
72 | strbuf_grow(sb, 0); | ||
73 | assert(len < sb->alloc); | ||
74 | sb->len = len; | ||
75 | sb->buf[len] = '\0'; | ||
76 | } | ||
77 | #define strbuf_reset(sb) strbuf_setlen(sb, 0) | ||
78 | |||
79 | /*----- content related -----*/ | ||
80 | extern void strbuf_trim(struct strbuf *); | ||
81 | extern void strbuf_rtrim(struct strbuf *); | ||
82 | extern void strbuf_ltrim(struct strbuf *); | ||
83 | extern int strbuf_cmp(const struct strbuf *, const struct strbuf *); | ||
84 | extern void strbuf_tolower(struct strbuf *); | ||
85 | |||
86 | extern struct strbuf **strbuf_split(const struct strbuf *, int delim); | ||
87 | extern void strbuf_list_free(struct strbuf **); | ||
88 | |||
89 | /*----- add data in your buffer -----*/ | ||
90 | static inline void strbuf_addch(struct strbuf *sb, int c) { | ||
91 | strbuf_grow(sb, 1); | ||
92 | sb->buf[sb->len++] = c; | ||
93 | sb->buf[sb->len] = '\0'; | ||
94 | } | ||
95 | |||
96 | extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t); | ||
97 | extern void strbuf_remove(struct strbuf *, size_t pos, size_t len); | ||
98 | |||
99 | /* splice pos..pos+len with given data */ | ||
100 | extern void strbuf_splice(struct strbuf *, size_t pos, size_t len, | ||
101 | const void *, size_t); | ||
102 | |||
103 | extern void strbuf_add(struct strbuf *, const void *, size_t); | ||
104 | static inline void strbuf_addstr(struct strbuf *sb, const char *s) { | ||
105 | strbuf_add(sb, s, strlen(s)); | ||
106 | } | ||
107 | static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) { | ||
108 | strbuf_add(sb, sb2->buf, sb2->len); | ||
109 | } | ||
110 | extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len); | ||
111 | |||
112 | typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context); | ||
113 | extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context); | ||
114 | struct strbuf_expand_dict_entry { | ||
115 | const char *placeholder; | ||
116 | const char *value; | ||
117 | }; | ||
118 | extern size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, void *context); | ||
119 | |||
120 | __attribute__((format(printf,2,3))) | ||
121 | extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); | ||
122 | |||
123 | extern size_t strbuf_fread(struct strbuf *, size_t, FILE *); | ||
124 | /* XXX: if read fails, any partial read is undone */ | ||
125 | extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint); | ||
126 | extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint); | ||
127 | extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint); | ||
128 | |||
129 | extern int strbuf_getline(struct strbuf *, FILE *, int); | ||
130 | |||
131 | extern void stripspace(struct strbuf *buf, int skip_comments); | ||
132 | extern int launch_editor(const char *path, struct strbuf *buffer, const char *const *env); | ||
133 | |||
134 | extern int strbuf_branchname(struct strbuf *sb, const char *name); | ||
135 | extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name); | ||
136 | |||
137 | #endif /* STRBUF_H */ | ||
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c new file mode 100644 index 000000000000..ec33c0c7f4e2 --- /dev/null +++ b/tools/perf/util/string.c | |||
@@ -0,0 +1,34 @@ | |||
1 | #include "string.h" | ||
2 | |||
3 | static int hex(char ch) | ||
4 | { | ||
5 | if ((ch >= '0') && (ch <= '9')) | ||
6 | return ch - '0'; | ||
7 | if ((ch >= 'a') && (ch <= 'f')) | ||
8 | return ch - 'a' + 10; | ||
9 | if ((ch >= 'A') && (ch <= 'F')) | ||
10 | return ch - 'A' + 10; | ||
11 | return -1; | ||
12 | } | ||
13 | |||
14 | /* | ||
15 | * While we find nice hex chars, build a long_val. | ||
16 | * Return number of chars processed. | ||
17 | */ | ||
18 | int hex2u64(const char *ptr, __u64 *long_val) | ||
19 | { | ||
20 | const char *p = ptr; | ||
21 | *long_val = 0; | ||
22 | |||
23 | while (*p) { | ||
24 | const int hex_val = hex(*p); | ||
25 | |||
26 | if (hex_val < 0) | ||
27 | break; | ||
28 | |||
29 | *long_val = (*long_val << 4) | hex_val; | ||
30 | p++; | ||
31 | } | ||
32 | |||
33 | return p - ptr; | ||
34 | } | ||
diff --git a/tools/perf/util/string.h b/tools/perf/util/string.h new file mode 100644 index 000000000000..72812c1c9a7a --- /dev/null +++ b/tools/perf/util/string.h | |||
@@ -0,0 +1,8 @@ | |||
1 | #ifndef _PERF_STRING_H_ | ||
2 | #define _PERF_STRING_H_ | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | |||
6 | int hex2u64(const char *ptr, __u64 *val); | ||
7 | |||
8 | #endif | ||
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c new file mode 100644 index 000000000000..49a55f813712 --- /dev/null +++ b/tools/perf/util/symbol.c | |||
@@ -0,0 +1,641 @@ | |||
1 | #include "util.h" | ||
2 | #include "../perf.h" | ||
3 | #include "string.h" | ||
4 | #include "symbol.h" | ||
5 | |||
6 | #include <libelf.h> | ||
7 | #include <gelf.h> | ||
8 | #include <elf.h> | ||
9 | |||
10 | const char *sym_hist_filter; | ||
11 | |||
12 | static struct symbol *symbol__new(__u64 start, __u64 len, | ||
13 | const char *name, unsigned int priv_size, | ||
14 | __u64 obj_start, int verbose) | ||
15 | { | ||
16 | size_t namelen = strlen(name) + 1; | ||
17 | struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen); | ||
18 | |||
19 | if (!self) | ||
20 | return NULL; | ||
21 | |||
22 | if (verbose >= 2) | ||
23 | printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n", | ||
24 | (__u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start); | ||
25 | |||
26 | self->obj_start= obj_start; | ||
27 | self->hist = NULL; | ||
28 | self->hist_sum = 0; | ||
29 | |||
30 | if (sym_hist_filter && !strcmp(name, sym_hist_filter)) | ||
31 | self->hist = calloc(sizeof(__u64), len); | ||
32 | |||
33 | if (priv_size) { | ||
34 | memset(self, 0, priv_size); | ||
35 | self = ((void *)self) + priv_size; | ||
36 | } | ||
37 | self->start = start; | ||
38 | self->end = start + len - 1; | ||
39 | memcpy(self->name, name, namelen); | ||
40 | |||
41 | return self; | ||
42 | } | ||
43 | |||
44 | static void symbol__delete(struct symbol *self, unsigned int priv_size) | ||
45 | { | ||
46 | free(((void *)self) - priv_size); | ||
47 | } | ||
48 | |||
49 | static size_t symbol__fprintf(struct symbol *self, FILE *fp) | ||
50 | { | ||
51 | return fprintf(fp, " %llx-%llx %s\n", | ||
52 | self->start, self->end, self->name); | ||
53 | } | ||
54 | |||
55 | struct dso *dso__new(const char *name, unsigned int sym_priv_size) | ||
56 | { | ||
57 | struct dso *self = malloc(sizeof(*self) + strlen(name) + 1); | ||
58 | |||
59 | if (self != NULL) { | ||
60 | strcpy(self->name, name); | ||
61 | self->syms = RB_ROOT; | ||
62 | self->sym_priv_size = sym_priv_size; | ||
63 | self->find_symbol = dso__find_symbol; | ||
64 | } | ||
65 | |||
66 | return self; | ||
67 | } | ||
68 | |||
69 | static void dso__delete_symbols(struct dso *self) | ||
70 | { | ||
71 | struct symbol *pos; | ||
72 | struct rb_node *next = rb_first(&self->syms); | ||
73 | |||
74 | while (next) { | ||
75 | pos = rb_entry(next, struct symbol, rb_node); | ||
76 | next = rb_next(&pos->rb_node); | ||
77 | rb_erase(&pos->rb_node, &self->syms); | ||
78 | symbol__delete(pos, self->sym_priv_size); | ||
79 | } | ||
80 | } | ||
81 | |||
82 | void dso__delete(struct dso *self) | ||
83 | { | ||
84 | dso__delete_symbols(self); | ||
85 | free(self); | ||
86 | } | ||
87 | |||
88 | static void dso__insert_symbol(struct dso *self, struct symbol *sym) | ||
89 | { | ||
90 | struct rb_node **p = &self->syms.rb_node; | ||
91 | struct rb_node *parent = NULL; | ||
92 | const __u64 ip = sym->start; | ||
93 | struct symbol *s; | ||
94 | |||
95 | while (*p != NULL) { | ||
96 | parent = *p; | ||
97 | s = rb_entry(parent, struct symbol, rb_node); | ||
98 | if (ip < s->start) | ||
99 | p = &(*p)->rb_left; | ||
100 | else | ||
101 | p = &(*p)->rb_right; | ||
102 | } | ||
103 | rb_link_node(&sym->rb_node, parent, p); | ||
104 | rb_insert_color(&sym->rb_node, &self->syms); | ||
105 | } | ||
106 | |||
107 | struct symbol *dso__find_symbol(struct dso *self, __u64 ip) | ||
108 | { | ||
109 | struct rb_node *n; | ||
110 | |||
111 | if (self == NULL) | ||
112 | return NULL; | ||
113 | |||
114 | n = self->syms.rb_node; | ||
115 | |||
116 | while (n) { | ||
117 | struct symbol *s = rb_entry(n, struct symbol, rb_node); | ||
118 | |||
119 | if (ip < s->start) | ||
120 | n = n->rb_left; | ||
121 | else if (ip > s->end) | ||
122 | n = n->rb_right; | ||
123 | else | ||
124 | return s; | ||
125 | } | ||
126 | |||
127 | return NULL; | ||
128 | } | ||
129 | |||
130 | size_t dso__fprintf(struct dso *self, FILE *fp) | ||
131 | { | ||
132 | size_t ret = fprintf(fp, "dso: %s\n", self->name); | ||
133 | |||
134 | struct rb_node *nd; | ||
135 | for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) { | ||
136 | struct symbol *pos = rb_entry(nd, struct symbol, rb_node); | ||
137 | ret += symbol__fprintf(pos, fp); | ||
138 | } | ||
139 | |||
140 | return ret; | ||
141 | } | ||
142 | |||
143 | static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verbose) | ||
144 | { | ||
145 | struct rb_node *nd, *prevnd; | ||
146 | char *line = NULL; | ||
147 | size_t n; | ||
148 | FILE *file = fopen("/proc/kallsyms", "r"); | ||
149 | |||
150 | if (file == NULL) | ||
151 | goto out_failure; | ||
152 | |||
153 | while (!feof(file)) { | ||
154 | __u64 start; | ||
155 | struct symbol *sym; | ||
156 | int line_len, len; | ||
157 | char symbol_type; | ||
158 | |||
159 | line_len = getline(&line, &n, file); | ||
160 | if (line_len < 0) | ||
161 | break; | ||
162 | |||
163 | if (!line) | ||
164 | goto out_failure; | ||
165 | |||
166 | line[--line_len] = '\0'; /* \n */ | ||
167 | |||
168 | len = hex2u64(line, &start); | ||
169 | |||
170 | len++; | ||
171 | if (len + 2 >= line_len) | ||
172 | continue; | ||
173 | |||
174 | symbol_type = toupper(line[len]); | ||
175 | /* | ||
176 | * We're interested only in code ('T'ext) | ||
177 | */ | ||
178 | if (symbol_type != 'T' && symbol_type != 'W') | ||
179 | continue; | ||
180 | /* | ||
181 | * Well fix up the end later, when we have all sorted. | ||
182 | */ | ||
183 | sym = symbol__new(start, 0xdead, line + len + 2, | ||
184 | self->sym_priv_size, 0, verbose); | ||
185 | |||
186 | if (sym == NULL) | ||
187 | goto out_delete_line; | ||
188 | |||
189 | if (filter && filter(self, sym)) | ||
190 | symbol__delete(sym, self->sym_priv_size); | ||
191 | else | ||
192 | dso__insert_symbol(self, sym); | ||
193 | } | ||
194 | |||
195 | /* | ||
196 | * Now that we have all sorted out, just set the ->end of all | ||
197 | * symbols | ||
198 | */ | ||
199 | prevnd = rb_first(&self->syms); | ||
200 | |||
201 | if (prevnd == NULL) | ||
202 | goto out_delete_line; | ||
203 | |||
204 | for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) { | ||
205 | struct symbol *prev = rb_entry(prevnd, struct symbol, rb_node), | ||
206 | *curr = rb_entry(nd, struct symbol, rb_node); | ||
207 | |||
208 | prev->end = curr->start - 1; | ||
209 | prevnd = nd; | ||
210 | } | ||
211 | |||
212 | free(line); | ||
213 | fclose(file); | ||
214 | |||
215 | return 0; | ||
216 | |||
217 | out_delete_line: | ||
218 | free(line); | ||
219 | out_failure: | ||
220 | return -1; | ||
221 | } | ||
222 | |||
223 | static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int verbose) | ||
224 | { | ||
225 | char *line = NULL; | ||
226 | size_t n; | ||
227 | FILE *file; | ||
228 | int nr_syms = 0; | ||
229 | |||
230 | file = fopen(self->name, "r"); | ||
231 | if (file == NULL) | ||
232 | goto out_failure; | ||
233 | |||
234 | while (!feof(file)) { | ||
235 | __u64 start, size; | ||
236 | struct symbol *sym; | ||
237 | int line_len, len; | ||
238 | |||
239 | line_len = getline(&line, &n, file); | ||
240 | if (line_len < 0) | ||
241 | break; | ||
242 | |||
243 | if (!line) | ||
244 | goto out_failure; | ||
245 | |||
246 | line[--line_len] = '\0'; /* \n */ | ||
247 | |||
248 | len = hex2u64(line, &start); | ||
249 | |||
250 | len++; | ||
251 | if (len + 2 >= line_len) | ||
252 | continue; | ||
253 | |||
254 | len += hex2u64(line + len, &size); | ||
255 | |||
256 | len++; | ||
257 | if (len + 2 >= line_len) | ||
258 | continue; | ||
259 | |||
260 | sym = symbol__new(start, size, line + len, | ||
261 | self->sym_priv_size, start, verbose); | ||
262 | |||
263 | if (sym == NULL) | ||
264 | goto out_delete_line; | ||
265 | |||
266 | if (filter && filter(self, sym)) | ||
267 | symbol__delete(sym, self->sym_priv_size); | ||
268 | else { | ||
269 | dso__insert_symbol(self, sym); | ||
270 | nr_syms++; | ||
271 | } | ||
272 | } | ||
273 | |||
274 | free(line); | ||
275 | fclose(file); | ||
276 | |||
277 | return nr_syms; | ||
278 | |||
279 | out_delete_line: | ||
280 | free(line); | ||
281 | out_failure: | ||
282 | return -1; | ||
283 | } | ||
284 | |||
285 | /** | ||
286 | * elf_symtab__for_each_symbol - iterate thru all the symbols | ||
287 | * | ||
288 | * @self: struct elf_symtab instance to iterate | ||
289 | * @index: uint32_t index | ||
290 | * @sym: GElf_Sym iterator | ||
291 | */ | ||
292 | #define elf_symtab__for_each_symbol(syms, nr_syms, index, sym) \ | ||
293 | for (index = 0, gelf_getsym(syms, index, &sym);\ | ||
294 | index < nr_syms; \ | ||
295 | index++, gelf_getsym(syms, index, &sym)) | ||
296 | |||
297 | static inline uint8_t elf_sym__type(const GElf_Sym *sym) | ||
298 | { | ||
299 | return GELF_ST_TYPE(sym->st_info); | ||
300 | } | ||
301 | |||
302 | static inline int elf_sym__is_function(const GElf_Sym *sym) | ||
303 | { | ||
304 | return elf_sym__type(sym) == STT_FUNC && | ||
305 | sym->st_name != 0 && | ||
306 | sym->st_shndx != SHN_UNDEF && | ||
307 | sym->st_size != 0; | ||
308 | } | ||
309 | |||
310 | static inline const char *elf_sym__name(const GElf_Sym *sym, | ||
311 | const Elf_Data *symstrs) | ||
312 | { | ||
313 | return symstrs->d_buf + sym->st_name; | ||
314 | } | ||
315 | |||
316 | static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, | ||
317 | GElf_Shdr *shp, const char *name, | ||
318 | size_t *index) | ||
319 | { | ||
320 | Elf_Scn *sec = NULL; | ||
321 | size_t cnt = 1; | ||
322 | |||
323 | while ((sec = elf_nextscn(elf, sec)) != NULL) { | ||
324 | char *str; | ||
325 | |||
326 | gelf_getshdr(sec, shp); | ||
327 | str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name); | ||
328 | if (!strcmp(name, str)) { | ||
329 | if (index) | ||
330 | *index = cnt; | ||
331 | break; | ||
332 | } | ||
333 | ++cnt; | ||
334 | } | ||
335 | |||
336 | return sec; | ||
337 | } | ||
338 | |||
339 | #define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \ | ||
340 | for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \ | ||
341 | idx < nr_entries; \ | ||
342 | ++idx, pos = gelf_getrel(reldata, idx, &pos_mem)) | ||
343 | |||
344 | #define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \ | ||
345 | for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \ | ||
346 | idx < nr_entries; \ | ||
347 | ++idx, pos = gelf_getrela(reldata, idx, &pos_mem)) | ||
348 | |||
349 | static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf, | ||
350 | GElf_Ehdr *ehdr, Elf_Scn *scn_dynsym, | ||
351 | GElf_Shdr *shdr_dynsym, | ||
352 | size_t dynsym_idx, int verbose) | ||
353 | { | ||
354 | uint32_t nr_rel_entries, idx; | ||
355 | GElf_Sym sym; | ||
356 | __u64 plt_offset; | ||
357 | GElf_Shdr shdr_plt; | ||
358 | struct symbol *f; | ||
359 | GElf_Shdr shdr_rel_plt; | ||
360 | Elf_Data *reldata, *syms, *symstrs; | ||
361 | Elf_Scn *scn_plt_rel, *scn_symstrs; | ||
362 | char sympltname[1024]; | ||
363 | int nr = 0, symidx; | ||
364 | |||
365 | scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt, | ||
366 | ".rela.plt", NULL); | ||
367 | if (scn_plt_rel == NULL) { | ||
368 | scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt, | ||
369 | ".rel.plt", NULL); | ||
370 | if (scn_plt_rel == NULL) | ||
371 | return 0; | ||
372 | } | ||
373 | |||
374 | if (shdr_rel_plt.sh_link != dynsym_idx) | ||
375 | return 0; | ||
376 | |||
377 | if (elf_section_by_name(elf, ehdr, &shdr_plt, ".plt", NULL) == NULL) | ||
378 | return 0; | ||
379 | |||
380 | /* | ||
381 | * Fetch the relocation section to find the indexes to the GOT | ||
382 | * and the symbols in the .dynsym they refer to. | ||
383 | */ | ||
384 | reldata = elf_getdata(scn_plt_rel, NULL); | ||
385 | if (reldata == NULL) | ||
386 | return -1; | ||
387 | |||
388 | syms = elf_getdata(scn_dynsym, NULL); | ||
389 | if (syms == NULL) | ||
390 | return -1; | ||
391 | |||
392 | scn_symstrs = elf_getscn(elf, shdr_dynsym->sh_link); | ||
393 | if (scn_symstrs == NULL) | ||
394 | return -1; | ||
395 | |||
396 | symstrs = elf_getdata(scn_symstrs, NULL); | ||
397 | if (symstrs == NULL) | ||
398 | return -1; | ||
399 | |||
400 | nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; | ||
401 | plt_offset = shdr_plt.sh_offset; | ||
402 | |||
403 | if (shdr_rel_plt.sh_type == SHT_RELA) { | ||
404 | GElf_Rela pos_mem, *pos; | ||
405 | |||
406 | elf_section__for_each_rela(reldata, pos, pos_mem, idx, | ||
407 | nr_rel_entries) { | ||
408 | symidx = GELF_R_SYM(pos->r_info); | ||
409 | plt_offset += shdr_plt.sh_entsize; | ||
410 | gelf_getsym(syms, symidx, &sym); | ||
411 | snprintf(sympltname, sizeof(sympltname), | ||
412 | "%s@plt", elf_sym__name(&sym, symstrs)); | ||
413 | |||
414 | f = symbol__new(plt_offset, shdr_plt.sh_entsize, | ||
415 | sympltname, self->sym_priv_size, 0, verbose); | ||
416 | if (!f) | ||
417 | return -1; | ||
418 | |||
419 | dso__insert_symbol(self, f); | ||
420 | ++nr; | ||
421 | } | ||
422 | } else if (shdr_rel_plt.sh_type == SHT_REL) { | ||
423 | GElf_Rel pos_mem, *pos; | ||
424 | elf_section__for_each_rel(reldata, pos, pos_mem, idx, | ||
425 | nr_rel_entries) { | ||
426 | symidx = GELF_R_SYM(pos->r_info); | ||
427 | plt_offset += shdr_plt.sh_entsize; | ||
428 | gelf_getsym(syms, symidx, &sym); | ||
429 | snprintf(sympltname, sizeof(sympltname), | ||
430 | "%s@plt", elf_sym__name(&sym, symstrs)); | ||
431 | |||
432 | f = symbol__new(plt_offset, shdr_plt.sh_entsize, | ||
433 | sympltname, self->sym_priv_size, 0, verbose); | ||
434 | if (!f) | ||
435 | return -1; | ||
436 | |||
437 | dso__insert_symbol(self, f); | ||
438 | ++nr; | ||
439 | } | ||
440 | } else { | ||
441 | /* | ||
442 | * TODO: There are still one more shdr_rel_plt.sh_type | ||
443 | * I have to investigate, but probably should be ignored. | ||
444 | */ | ||
445 | } | ||
446 | |||
447 | return nr; | ||
448 | } | ||
449 | |||
450 | static int dso__load_sym(struct dso *self, int fd, const char *name, | ||
451 | symbol_filter_t filter, int verbose) | ||
452 | { | ||
453 | Elf_Data *symstrs; | ||
454 | uint32_t nr_syms; | ||
455 | int err = -1; | ||
456 | uint32_t index; | ||
457 | GElf_Ehdr ehdr; | ||
458 | GElf_Shdr shdr; | ||
459 | Elf_Data *syms; | ||
460 | GElf_Sym sym; | ||
461 | Elf_Scn *sec, *sec_dynsym; | ||
462 | Elf *elf; | ||
463 | size_t dynsym_idx; | ||
464 | int nr = 0; | ||
465 | |||
466 | elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); | ||
467 | if (elf == NULL) { | ||
468 | if (verbose) | ||
469 | fprintf(stderr, "%s: cannot read %s ELF file.\n", | ||
470 | __func__, name); | ||
471 | goto out_close; | ||
472 | } | ||
473 | |||
474 | if (gelf_getehdr(elf, &ehdr) == NULL) { | ||
475 | if (verbose) | ||
476 | fprintf(stderr, "%s: cannot get elf header.\n", __func__); | ||
477 | goto out_elf_end; | ||
478 | } | ||
479 | |||
480 | /* | ||
481 | * We need to check if we have a .dynsym, so that we can handle the | ||
482 | * .plt, synthesizing its symbols, that aren't on the symtabs (be it | ||
483 | * .dynsym or .symtab) | ||
484 | */ | ||
485 | sec_dynsym = elf_section_by_name(elf, &ehdr, &shdr, | ||
486 | ".dynsym", &dynsym_idx); | ||
487 | if (sec_dynsym != NULL) { | ||
488 | nr = dso__synthesize_plt_symbols(self, elf, &ehdr, | ||
489 | sec_dynsym, &shdr, | ||
490 | dynsym_idx, verbose); | ||
491 | if (nr < 0) | ||
492 | goto out_elf_end; | ||
493 | } | ||
494 | |||
495 | /* | ||
496 | * But if we have a full .symtab (that is a superset of .dynsym) we | ||
497 | * should add the symbols not in the .dynsyn | ||
498 | */ | ||
499 | sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL); | ||
500 | if (sec == NULL) { | ||
501 | if (sec_dynsym == NULL) | ||
502 | goto out_elf_end; | ||
503 | |||
504 | sec = sec_dynsym; | ||
505 | gelf_getshdr(sec, &shdr); | ||
506 | } | ||
507 | |||
508 | syms = elf_getdata(sec, NULL); | ||
509 | if (syms == NULL) | ||
510 | goto out_elf_end; | ||
511 | |||
512 | sec = elf_getscn(elf, shdr.sh_link); | ||
513 | if (sec == NULL) | ||
514 | goto out_elf_end; | ||
515 | |||
516 | symstrs = elf_getdata(sec, NULL); | ||
517 | if (symstrs == NULL) | ||
518 | goto out_elf_end; | ||
519 | |||
520 | nr_syms = shdr.sh_size / shdr.sh_entsize; | ||
521 | |||
522 | memset(&sym, 0, sizeof(sym)); | ||
523 | |||
524 | elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { | ||
525 | struct symbol *f; | ||
526 | __u64 obj_start; | ||
527 | |||
528 | if (!elf_sym__is_function(&sym)) | ||
529 | continue; | ||
530 | |||
531 | sec = elf_getscn(elf, sym.st_shndx); | ||
532 | if (!sec) | ||
533 | goto out_elf_end; | ||
534 | |||
535 | gelf_getshdr(sec, &shdr); | ||
536 | obj_start = sym.st_value; | ||
537 | |||
538 | sym.st_value -= shdr.sh_addr - shdr.sh_offset; | ||
539 | |||
540 | f = symbol__new(sym.st_value, sym.st_size, | ||
541 | elf_sym__name(&sym, symstrs), | ||
542 | self->sym_priv_size, obj_start, verbose); | ||
543 | if (!f) | ||
544 | goto out_elf_end; | ||
545 | |||
546 | if (filter && filter(self, f)) | ||
547 | symbol__delete(f, self->sym_priv_size); | ||
548 | else { | ||
549 | dso__insert_symbol(self, f); | ||
550 | nr++; | ||
551 | } | ||
552 | } | ||
553 | |||
554 | err = nr; | ||
555 | out_elf_end: | ||
556 | elf_end(elf); | ||
557 | out_close: | ||
558 | return err; | ||
559 | } | ||
560 | |||
561 | int dso__load(struct dso *self, symbol_filter_t filter, int verbose) | ||
562 | { | ||
563 | int size = strlen(self->name) + sizeof("/usr/lib/debug%s.debug"); | ||
564 | char *name = malloc(size); | ||
565 | int variant = 0; | ||
566 | int ret = -1; | ||
567 | int fd; | ||
568 | |||
569 | if (!name) | ||
570 | return -1; | ||
571 | |||
572 | if (strncmp(self->name, "/tmp/perf-", 10) == 0) | ||
573 | return dso__load_perf_map(self, filter, verbose); | ||
574 | |||
575 | more: | ||
576 | do { | ||
577 | switch (variant) { | ||
578 | case 0: /* Fedora */ | ||
579 | snprintf(name, size, "/usr/lib/debug%s.debug", self->name); | ||
580 | break; | ||
581 | case 1: /* Ubuntu */ | ||
582 | snprintf(name, size, "/usr/lib/debug%s", self->name); | ||
583 | break; | ||
584 | case 2: /* Sane people */ | ||
585 | snprintf(name, size, "%s", self->name); | ||
586 | break; | ||
587 | |||
588 | default: | ||
589 | goto out; | ||
590 | } | ||
591 | variant++; | ||
592 | |||
593 | fd = open(name, O_RDONLY); | ||
594 | } while (fd < 0); | ||
595 | |||
596 | ret = dso__load_sym(self, fd, name, filter, verbose); | ||
597 | close(fd); | ||
598 | |||
599 | /* | ||
600 | * Some people seem to have debuginfo files _WITHOUT_ debug info!?!? | ||
601 | */ | ||
602 | if (!ret) | ||
603 | goto more; | ||
604 | |||
605 | out: | ||
606 | free(name); | ||
607 | return ret; | ||
608 | } | ||
609 | |||
610 | static int dso__load_vmlinux(struct dso *self, const char *vmlinux, | ||
611 | symbol_filter_t filter, int verbose) | ||
612 | { | ||
613 | int err, fd = open(vmlinux, O_RDONLY); | ||
614 | |||
615 | if (fd < 0) | ||
616 | return -1; | ||
617 | |||
618 | err = dso__load_sym(self, fd, vmlinux, filter, verbose); | ||
619 | close(fd); | ||
620 | |||
621 | return err; | ||
622 | } | ||
623 | |||
624 | int dso__load_kernel(struct dso *self, const char *vmlinux, | ||
625 | symbol_filter_t filter, int verbose) | ||
626 | { | ||
627 | int err = -1; | ||
628 | |||
629 | if (vmlinux) | ||
630 | err = dso__load_vmlinux(self, vmlinux, filter, verbose); | ||
631 | |||
632 | if (err) | ||
633 | err = dso__load_kallsyms(self, filter, verbose); | ||
634 | |||
635 | return err; | ||
636 | } | ||
637 | |||
638 | void symbol__init(void) | ||
639 | { | ||
640 | elf_version(EV_CURRENT); | ||
641 | } | ||
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h new file mode 100644 index 000000000000..0d1292bd8270 --- /dev/null +++ b/tools/perf/util/symbol.h | |||
@@ -0,0 +1,47 @@ | |||
1 | #ifndef _PERF_SYMBOL_ | ||
2 | #define _PERF_SYMBOL_ 1 | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | #include "list.h" | ||
6 | #include "rbtree.h" | ||
7 | |||
8 | struct symbol { | ||
9 | struct rb_node rb_node; | ||
10 | __u64 start; | ||
11 | __u64 end; | ||
12 | __u64 obj_start; | ||
13 | __u64 hist_sum; | ||
14 | __u64 *hist; | ||
15 | char name[0]; | ||
16 | }; | ||
17 | |||
18 | struct dso { | ||
19 | struct list_head node; | ||
20 | struct rb_root syms; | ||
21 | unsigned int sym_priv_size; | ||
22 | struct symbol *(*find_symbol)(struct dso *, __u64 ip); | ||
23 | char name[0]; | ||
24 | }; | ||
25 | |||
26 | const char *sym_hist_filter; | ||
27 | |||
28 | typedef int (*symbol_filter_t)(struct dso *self, struct symbol *sym); | ||
29 | |||
30 | struct dso *dso__new(const char *name, unsigned int sym_priv_size); | ||
31 | void dso__delete(struct dso *self); | ||
32 | |||
33 | static inline void *dso__sym_priv(struct dso *self, struct symbol *sym) | ||
34 | { | ||
35 | return ((void *)sym) - self->sym_priv_size; | ||
36 | } | ||
37 | |||
38 | struct symbol *dso__find_symbol(struct dso *self, __u64 ip); | ||
39 | |||
40 | int dso__load_kernel(struct dso *self, const char *vmlinux, | ||
41 | symbol_filter_t filter, int verbose); | ||
42 | int dso__load(struct dso *self, symbol_filter_t filter, int verbose); | ||
43 | |||
44 | size_t dso__fprintf(struct dso *self, FILE *fp); | ||
45 | |||
46 | void symbol__init(void); | ||
47 | #endif /* _PERF_SYMBOL_ */ | ||
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c new file mode 100644 index 000000000000..e16bf9a707e8 --- /dev/null +++ b/tools/perf/util/usage.c | |||
@@ -0,0 +1,80 @@ | |||
1 | /* | ||
2 | * GIT - The information manager from hell | ||
3 | * | ||
4 | * Copyright (C) Linus Torvalds, 2005 | ||
5 | */ | ||
6 | #include "util.h" | ||
7 | |||
8 | static void report(const char *prefix, const char *err, va_list params) | ||
9 | { | ||
10 | char msg[1024]; | ||
11 | vsnprintf(msg, sizeof(msg), err, params); | ||
12 | fprintf(stderr, " %s%s\n", prefix, msg); | ||
13 | } | ||
14 | |||
15 | static NORETURN void usage_builtin(const char *err) | ||
16 | { | ||
17 | fprintf(stderr, "\n Usage: %s\n", err); | ||
18 | exit(129); | ||
19 | } | ||
20 | |||
21 | static NORETURN void die_builtin(const char *err, va_list params) | ||
22 | { | ||
23 | report(" Fatal: ", err, params); | ||
24 | exit(128); | ||
25 | } | ||
26 | |||
27 | static void error_builtin(const char *err, va_list params) | ||
28 | { | ||
29 | report(" Error: ", err, params); | ||
30 | } | ||
31 | |||
32 | static void warn_builtin(const char *warn, va_list params) | ||
33 | { | ||
34 | report(" Warning: ", warn, params); | ||
35 | } | ||
36 | |||
37 | /* If we are in a dlopen()ed .so write to a global variable would segfault | ||
38 | * (ugh), so keep things static. */ | ||
39 | static void (*usage_routine)(const char *err) NORETURN = usage_builtin; | ||
40 | static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin; | ||
41 | static void (*error_routine)(const char *err, va_list params) = error_builtin; | ||
42 | static void (*warn_routine)(const char *err, va_list params) = warn_builtin; | ||
43 | |||
44 | void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN) | ||
45 | { | ||
46 | die_routine = routine; | ||
47 | } | ||
48 | |||
49 | void usage(const char *err) | ||
50 | { | ||
51 | usage_routine(err); | ||
52 | } | ||
53 | |||
54 | void die(const char *err, ...) | ||
55 | { | ||
56 | va_list params; | ||
57 | |||
58 | va_start(params, err); | ||
59 | die_routine(err, params); | ||
60 | va_end(params); | ||
61 | } | ||
62 | |||
63 | int error(const char *err, ...) | ||
64 | { | ||
65 | va_list params; | ||
66 | |||
67 | va_start(params, err); | ||
68 | error_routine(err, params); | ||
69 | va_end(params); | ||
70 | return -1; | ||
71 | } | ||
72 | |||
73 | void warning(const char *warn, ...) | ||
74 | { | ||
75 | va_list params; | ||
76 | |||
77 | va_start(params, warn); | ||
78 | warn_routine(warn, params); | ||
79 | va_end(params); | ||
80 | } | ||
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h new file mode 100644 index 000000000000..76590a16c271 --- /dev/null +++ b/tools/perf/util/util.h | |||
@@ -0,0 +1,410 @@ | |||
1 | #ifndef GIT_COMPAT_UTIL_H | ||
2 | #define GIT_COMPAT_UTIL_H | ||
3 | |||
4 | #define _FILE_OFFSET_BITS 64 | ||
5 | |||
6 | #ifndef FLEX_ARRAY | ||
7 | /* | ||
8 | * See if our compiler is known to support flexible array members. | ||
9 | */ | ||
10 | #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) | ||
11 | # define FLEX_ARRAY /* empty */ | ||
12 | #elif defined(__GNUC__) | ||
13 | # if (__GNUC__ >= 3) | ||
14 | # define FLEX_ARRAY /* empty */ | ||
15 | # else | ||
16 | # define FLEX_ARRAY 0 /* older GNU extension */ | ||
17 | # endif | ||
18 | #endif | ||
19 | |||
20 | /* | ||
21 | * Otherwise, default to safer but a bit wasteful traditional style | ||
22 | */ | ||
23 | #ifndef FLEX_ARRAY | ||
24 | # define FLEX_ARRAY 1 | ||
25 | #endif | ||
26 | #endif | ||
27 | |||
28 | #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) | ||
29 | |||
30 | #ifdef __GNUC__ | ||
31 | #define TYPEOF(x) (__typeof__(x)) | ||
32 | #else | ||
33 | #define TYPEOF(x) | ||
34 | #endif | ||
35 | |||
36 | #define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits)))) | ||
37 | #define HAS_MULTI_BITS(i) ((i) & ((i) - 1)) /* checks if an integer has more than 1 bit set */ | ||
38 | |||
39 | /* Approximation of the length of the decimal representation of this type. */ | ||
40 | #define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1) | ||
41 | |||
42 | #if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__USLC__) && !defined(_M_UNIX) | ||
43 | #define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */ | ||
44 | #define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */ | ||
45 | #endif | ||
46 | #define _ALL_SOURCE 1 | ||
47 | #define _GNU_SOURCE 1 | ||
48 | #define _BSD_SOURCE 1 | ||
49 | |||
50 | #include <unistd.h> | ||
51 | #include <stdio.h> | ||
52 | #include <sys/stat.h> | ||
53 | #include <fcntl.h> | ||
54 | #include <stddef.h> | ||
55 | #include <stdlib.h> | ||
56 | #include <stdarg.h> | ||
57 | #include <string.h> | ||
58 | #include <errno.h> | ||
59 | #include <limits.h> | ||
60 | #include <sys/param.h> | ||
61 | #include <sys/types.h> | ||
62 | #include <dirent.h> | ||
63 | #include <sys/time.h> | ||
64 | #include <time.h> | ||
65 | #include <signal.h> | ||
66 | #include <fnmatch.h> | ||
67 | #include <assert.h> | ||
68 | #include <regex.h> | ||
69 | #include <utime.h> | ||
70 | #ifndef __MINGW32__ | ||
71 | #include <sys/wait.h> | ||
72 | #include <sys/poll.h> | ||
73 | #include <sys/socket.h> | ||
74 | #include <sys/ioctl.h> | ||
75 | #ifndef NO_SYS_SELECT_H | ||
76 | #include <sys/select.h> | ||
77 | #endif | ||
78 | #include <netinet/in.h> | ||
79 | #include <netinet/tcp.h> | ||
80 | #include <arpa/inet.h> | ||
81 | #include <netdb.h> | ||
82 | #include <pwd.h> | ||
83 | #include <inttypes.h> | ||
84 | #if defined(__CYGWIN__) | ||
85 | #undef _XOPEN_SOURCE | ||
86 | #include <grp.h> | ||
87 | #define _XOPEN_SOURCE 600 | ||
88 | #include "compat/cygwin.h" | ||
89 | #else | ||
90 | #undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */ | ||
91 | #include <grp.h> | ||
92 | #define _ALL_SOURCE 1 | ||
93 | #endif | ||
94 | #else /* __MINGW32__ */ | ||
95 | /* pull in Windows compatibility stuff */ | ||
96 | #include "compat/mingw.h" | ||
97 | #endif /* __MINGW32__ */ | ||
98 | |||
99 | #ifndef NO_ICONV | ||
100 | #include <iconv.h> | ||
101 | #endif | ||
102 | |||
103 | #ifndef NO_OPENSSL | ||
104 | #include <openssl/ssl.h> | ||
105 | #include <openssl/err.h> | ||
106 | #endif | ||
107 | |||
108 | /* On most systems <limits.h> would have given us this, but | ||
109 | * not on some systems (e.g. GNU/Hurd). | ||
110 | */ | ||
111 | #ifndef PATH_MAX | ||
112 | #define PATH_MAX 4096 | ||
113 | #endif | ||
114 | |||
115 | #ifndef PRIuMAX | ||
116 | #define PRIuMAX "llu" | ||
117 | #endif | ||
118 | |||
119 | #ifndef PRIu32 | ||
120 | #define PRIu32 "u" | ||
121 | #endif | ||
122 | |||
123 | #ifndef PRIx32 | ||
124 | #define PRIx32 "x" | ||
125 | #endif | ||
126 | |||
127 | #ifndef PATH_SEP | ||
128 | #define PATH_SEP ':' | ||
129 | #endif | ||
130 | |||
131 | #ifndef STRIP_EXTENSION | ||
132 | #define STRIP_EXTENSION "" | ||
133 | #endif | ||
134 | |||
135 | #ifndef has_dos_drive_prefix | ||
136 | #define has_dos_drive_prefix(path) 0 | ||
137 | #endif | ||
138 | |||
139 | #ifndef is_dir_sep | ||
140 | #define is_dir_sep(c) ((c) == '/') | ||
141 | #endif | ||
142 | |||
143 | #ifdef __GNUC__ | ||
144 | #define NORETURN __attribute__((__noreturn__)) | ||
145 | #else | ||
146 | #define NORETURN | ||
147 | #ifndef __attribute__ | ||
148 | #define __attribute__(x) | ||
149 | #endif | ||
150 | #endif | ||
151 | |||
152 | /* General helper functions */ | ||
153 | extern void usage(const char *err) NORETURN; | ||
154 | extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); | ||
155 | extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); | ||
156 | extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); | ||
157 | |||
158 | extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); | ||
159 | |||
160 | extern int prefixcmp(const char *str, const char *prefix); | ||
161 | extern time_t tm_to_time_t(const struct tm *tm); | ||
162 | |||
163 | static inline const char *skip_prefix(const char *str, const char *prefix) | ||
164 | { | ||
165 | size_t len = strlen(prefix); | ||
166 | return strncmp(str, prefix, len) ? NULL : str + len; | ||
167 | } | ||
168 | |||
169 | #if defined(NO_MMAP) || defined(USE_WIN32_MMAP) | ||
170 | |||
171 | #ifndef PROT_READ | ||
172 | #define PROT_READ 1 | ||
173 | #define PROT_WRITE 2 | ||
174 | #define MAP_PRIVATE 1 | ||
175 | #define MAP_FAILED ((void*)-1) | ||
176 | #endif | ||
177 | |||
178 | #define mmap git_mmap | ||
179 | #define munmap git_munmap | ||
180 | extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); | ||
181 | extern int git_munmap(void *start, size_t length); | ||
182 | |||
183 | #else /* NO_MMAP || USE_WIN32_MMAP */ | ||
184 | |||
185 | #include <sys/mman.h> | ||
186 | |||
187 | #endif /* NO_MMAP || USE_WIN32_MMAP */ | ||
188 | |||
189 | #ifdef NO_MMAP | ||
190 | |||
191 | /* This value must be multiple of (pagesize * 2) */ | ||
192 | #define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024) | ||
193 | |||
194 | #else /* NO_MMAP */ | ||
195 | |||
196 | /* This value must be multiple of (pagesize * 2) */ | ||
197 | #define DEFAULT_PACKED_GIT_WINDOW_SIZE \ | ||
198 | (sizeof(void*) >= 8 \ | ||
199 | ? 1 * 1024 * 1024 * 1024 \ | ||
200 | : 32 * 1024 * 1024) | ||
201 | |||
202 | #endif /* NO_MMAP */ | ||
203 | |||
204 | #ifdef NO_ST_BLOCKS_IN_STRUCT_STAT | ||
205 | #define on_disk_bytes(st) ((st).st_size) | ||
206 | #else | ||
207 | #define on_disk_bytes(st) ((st).st_blocks * 512) | ||
208 | #endif | ||
209 | |||
210 | #define DEFAULT_PACKED_GIT_LIMIT \ | ||
211 | ((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256)) | ||
212 | |||
213 | #ifdef NO_PREAD | ||
214 | #define pread git_pread | ||
215 | extern ssize_t git_pread(int fd, void *buf, size_t count, off_t offset); | ||
216 | #endif | ||
217 | /* | ||
218 | * Forward decl that will remind us if its twin in cache.h changes. | ||
219 | * This function is used in compat/pread.c. But we can't include | ||
220 | * cache.h there. | ||
221 | */ | ||
222 | extern ssize_t read_in_full(int fd, void *buf, size_t count); | ||
223 | |||
224 | #ifdef NO_SETENV | ||
225 | #define setenv gitsetenv | ||
226 | extern int gitsetenv(const char *, const char *, int); | ||
227 | #endif | ||
228 | |||
229 | #ifdef NO_MKDTEMP | ||
230 | #define mkdtemp gitmkdtemp | ||
231 | extern char *gitmkdtemp(char *); | ||
232 | #endif | ||
233 | |||
234 | #ifdef NO_UNSETENV | ||
235 | #define unsetenv gitunsetenv | ||
236 | extern void gitunsetenv(const char *); | ||
237 | #endif | ||
238 | |||
239 | #ifdef NO_STRCASESTR | ||
240 | #define strcasestr gitstrcasestr | ||
241 | extern char *gitstrcasestr(const char *haystack, const char *needle); | ||
242 | #endif | ||
243 | |||
244 | #ifdef NO_STRLCPY | ||
245 | #define strlcpy gitstrlcpy | ||
246 | extern size_t gitstrlcpy(char *, const char *, size_t); | ||
247 | #endif | ||
248 | |||
249 | #ifdef NO_STRTOUMAX | ||
250 | #define strtoumax gitstrtoumax | ||
251 | extern uintmax_t gitstrtoumax(const char *, char **, int); | ||
252 | #endif | ||
253 | |||
254 | #ifdef NO_HSTRERROR | ||
255 | #define hstrerror githstrerror | ||
256 | extern const char *githstrerror(int herror); | ||
257 | #endif | ||
258 | |||
259 | #ifdef NO_MEMMEM | ||
260 | #define memmem gitmemmem | ||
261 | void *gitmemmem(const void *haystack, size_t haystacklen, | ||
262 | const void *needle, size_t needlelen); | ||
263 | #endif | ||
264 | |||
265 | #ifdef FREAD_READS_DIRECTORIES | ||
266 | #ifdef fopen | ||
267 | #undef fopen | ||
268 | #endif | ||
269 | #define fopen(a,b) git_fopen(a,b) | ||
270 | extern FILE *git_fopen(const char*, const char*); | ||
271 | #endif | ||
272 | |||
273 | #ifdef SNPRINTF_RETURNS_BOGUS | ||
274 | #define snprintf git_snprintf | ||
275 | extern int git_snprintf(char *str, size_t maxsize, | ||
276 | const char *format, ...); | ||
277 | #define vsnprintf git_vsnprintf | ||
278 | extern int git_vsnprintf(char *str, size_t maxsize, | ||
279 | const char *format, va_list ap); | ||
280 | #endif | ||
281 | |||
282 | #ifdef __GLIBC_PREREQ | ||
283 | #if __GLIBC_PREREQ(2, 1) | ||
284 | #define HAVE_STRCHRNUL | ||
285 | #endif | ||
286 | #endif | ||
287 | |||
288 | #ifndef HAVE_STRCHRNUL | ||
289 | #define strchrnul gitstrchrnul | ||
290 | static inline char *gitstrchrnul(const char *s, int c) | ||
291 | { | ||
292 | while (*s && *s != c) | ||
293 | s++; | ||
294 | return (char *)s; | ||
295 | } | ||
296 | #endif | ||
297 | |||
298 | /* | ||
299 | * Wrappers: | ||
300 | */ | ||
301 | extern char *xstrdup(const char *str); | ||
302 | extern void *xmalloc(size_t size); | ||
303 | extern void *xmemdupz(const void *data, size_t len); | ||
304 | extern char *xstrndup(const char *str, size_t len); | ||
305 | extern void *xrealloc(void *ptr, size_t size); | ||
306 | extern void *xcalloc(size_t nmemb, size_t size); | ||
307 | extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); | ||
308 | extern ssize_t xread(int fd, void *buf, size_t len); | ||
309 | extern ssize_t xwrite(int fd, const void *buf, size_t len); | ||
310 | extern int xdup(int fd); | ||
311 | extern FILE *xfdopen(int fd, const char *mode); | ||
312 | extern int xmkstemp(char *template); | ||
313 | |||
314 | static inline size_t xsize_t(off_t len) | ||
315 | { | ||
316 | return (size_t)len; | ||
317 | } | ||
318 | |||
319 | static inline int has_extension(const char *filename, const char *ext) | ||
320 | { | ||
321 | size_t len = strlen(filename); | ||
322 | size_t extlen = strlen(ext); | ||
323 | return len > extlen && !memcmp(filename + len - extlen, ext, extlen); | ||
324 | } | ||
325 | |||
326 | /* Sane ctype - no locale, and works with signed chars */ | ||
327 | #undef isascii | ||
328 | #undef isspace | ||
329 | #undef isdigit | ||
330 | #undef isalpha | ||
331 | #undef isalnum | ||
332 | #undef tolower | ||
333 | #undef toupper | ||
334 | extern unsigned char sane_ctype[256]; | ||
335 | #define GIT_SPACE 0x01 | ||
336 | #define GIT_DIGIT 0x02 | ||
337 | #define GIT_ALPHA 0x04 | ||
338 | #define GIT_GLOB_SPECIAL 0x08 | ||
339 | #define GIT_REGEX_SPECIAL 0x10 | ||
340 | #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) | ||
341 | #define isascii(x) (((x) & ~0x7f) == 0) | ||
342 | #define isspace(x) sane_istest(x,GIT_SPACE) | ||
343 | #define isdigit(x) sane_istest(x,GIT_DIGIT) | ||
344 | #define isalpha(x) sane_istest(x,GIT_ALPHA) | ||
345 | #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) | ||
346 | #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) | ||
347 | #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) | ||
348 | #define tolower(x) sane_case((unsigned char)(x), 0x20) | ||
349 | #define toupper(x) sane_case((unsigned char)(x), 0) | ||
350 | |||
351 | static inline int sane_case(int x, int high) | ||
352 | { | ||
353 | if (sane_istest(x, GIT_ALPHA)) | ||
354 | x = (x & ~0x20) | high; | ||
355 | return x; | ||
356 | } | ||
357 | |||
358 | static inline int strtoul_ui(char const *s, int base, unsigned int *result) | ||
359 | { | ||
360 | unsigned long ul; | ||
361 | char *p; | ||
362 | |||
363 | errno = 0; | ||
364 | ul = strtoul(s, &p, base); | ||
365 | if (errno || *p || p == s || (unsigned int) ul != ul) | ||
366 | return -1; | ||
367 | *result = ul; | ||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | static inline int strtol_i(char const *s, int base, int *result) | ||
372 | { | ||
373 | long ul; | ||
374 | char *p; | ||
375 | |||
376 | errno = 0; | ||
377 | ul = strtol(s, &p, base); | ||
378 | if (errno || *p || p == s || (int) ul != ul) | ||
379 | return -1; | ||
380 | *result = ul; | ||
381 | return 0; | ||
382 | } | ||
383 | |||
384 | #ifdef INTERNAL_QSORT | ||
385 | void git_qsort(void *base, size_t nmemb, size_t size, | ||
386 | int(*compar)(const void *, const void *)); | ||
387 | #define qsort git_qsort | ||
388 | #endif | ||
389 | |||
390 | #ifndef DIR_HAS_BSD_GROUP_SEMANTICS | ||
391 | # define FORCE_DIR_SET_GID S_ISGID | ||
392 | #else | ||
393 | # define FORCE_DIR_SET_GID 0 | ||
394 | #endif | ||
395 | |||
396 | #ifdef NO_NSEC | ||
397 | #undef USE_NSEC | ||
398 | #define ST_CTIME_NSEC(st) 0 | ||
399 | #define ST_MTIME_NSEC(st) 0 | ||
400 | #else | ||
401 | #ifdef USE_ST_TIMESPEC | ||
402 | #define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec)) | ||
403 | #define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec)) | ||
404 | #else | ||
405 | #define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec)) | ||
406 | #define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec)) | ||
407 | #endif | ||
408 | #endif | ||
409 | |||
410 | #endif | ||
diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c new file mode 100644 index 000000000000..6350d65f6d9e --- /dev/null +++ b/tools/perf/util/wrapper.c | |||
@@ -0,0 +1,206 @@ | |||
1 | /* | ||
2 | * Various trivial helper wrappers around standard functions | ||
3 | */ | ||
4 | #include "cache.h" | ||
5 | |||
6 | /* | ||
7 | * There's no pack memory to release - but stay close to the Git | ||
8 | * version so wrap this away: | ||
9 | */ | ||
10 | static inline void release_pack_memory(size_t size, int flag) | ||
11 | { | ||
12 | } | ||
13 | |||
14 | char *xstrdup(const char *str) | ||
15 | { | ||
16 | char *ret = strdup(str); | ||
17 | if (!ret) { | ||
18 | release_pack_memory(strlen(str) + 1, -1); | ||
19 | ret = strdup(str); | ||
20 | if (!ret) | ||
21 | die("Out of memory, strdup failed"); | ||
22 | } | ||
23 | return ret; | ||
24 | } | ||
25 | |||
26 | void *xmalloc(size_t size) | ||
27 | { | ||
28 | void *ret = malloc(size); | ||
29 | if (!ret && !size) | ||
30 | ret = malloc(1); | ||
31 | if (!ret) { | ||
32 | release_pack_memory(size, -1); | ||
33 | ret = malloc(size); | ||
34 | if (!ret && !size) | ||
35 | ret = malloc(1); | ||
36 | if (!ret) | ||
37 | die("Out of memory, malloc failed"); | ||
38 | } | ||
39 | #ifdef XMALLOC_POISON | ||
40 | memset(ret, 0xA5, size); | ||
41 | #endif | ||
42 | return ret; | ||
43 | } | ||
44 | |||
45 | /* | ||
46 | * xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of | ||
47 | * "data" to the allocated memory, zero terminates the allocated memory, | ||
48 | * and returns a pointer to the allocated memory. If the allocation fails, | ||
49 | * the program dies. | ||
50 | */ | ||
51 | void *xmemdupz(const void *data, size_t len) | ||
52 | { | ||
53 | char *p = xmalloc(len + 1); | ||
54 | memcpy(p, data, len); | ||
55 | p[len] = '\0'; | ||
56 | return p; | ||
57 | } | ||
58 | |||
59 | char *xstrndup(const char *str, size_t len) | ||
60 | { | ||
61 | char *p = memchr(str, '\0', len); | ||
62 | return xmemdupz(str, p ? p - str : len); | ||
63 | } | ||
64 | |||
65 | void *xrealloc(void *ptr, size_t size) | ||
66 | { | ||
67 | void *ret = realloc(ptr, size); | ||
68 | if (!ret && !size) | ||
69 | ret = realloc(ptr, 1); | ||
70 | if (!ret) { | ||
71 | release_pack_memory(size, -1); | ||
72 | ret = realloc(ptr, size); | ||
73 | if (!ret && !size) | ||
74 | ret = realloc(ptr, 1); | ||
75 | if (!ret) | ||
76 | die("Out of memory, realloc failed"); | ||
77 | } | ||
78 | return ret; | ||
79 | } | ||
80 | |||
81 | void *xcalloc(size_t nmemb, size_t size) | ||
82 | { | ||
83 | void *ret = calloc(nmemb, size); | ||
84 | if (!ret && (!nmemb || !size)) | ||
85 | ret = calloc(1, 1); | ||
86 | if (!ret) { | ||
87 | release_pack_memory(nmemb * size, -1); | ||
88 | ret = calloc(nmemb, size); | ||
89 | if (!ret && (!nmemb || !size)) | ||
90 | ret = calloc(1, 1); | ||
91 | if (!ret) | ||
92 | die("Out of memory, calloc failed"); | ||
93 | } | ||
94 | return ret; | ||
95 | } | ||
96 | |||
97 | void *xmmap(void *start, size_t length, | ||
98 | int prot, int flags, int fd, off_t offset) | ||
99 | { | ||
100 | void *ret = mmap(start, length, prot, flags, fd, offset); | ||
101 | if (ret == MAP_FAILED) { | ||
102 | if (!length) | ||
103 | return NULL; | ||
104 | release_pack_memory(length, fd); | ||
105 | ret = mmap(start, length, prot, flags, fd, offset); | ||
106 | if (ret == MAP_FAILED) | ||
107 | die("Out of memory? mmap failed: %s", strerror(errno)); | ||
108 | } | ||
109 | return ret; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * xread() is the same a read(), but it automatically restarts read() | ||
114 | * operations with a recoverable error (EAGAIN and EINTR). xread() | ||
115 | * DOES NOT GUARANTEE that "len" bytes is read even if the data is available. | ||
116 | */ | ||
117 | ssize_t xread(int fd, void *buf, size_t len) | ||
118 | { | ||
119 | ssize_t nr; | ||
120 | while (1) { | ||
121 | nr = read(fd, buf, len); | ||
122 | if ((nr < 0) && (errno == EAGAIN || errno == EINTR)) | ||
123 | continue; | ||
124 | return nr; | ||
125 | } | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * xwrite() is the same a write(), but it automatically restarts write() | ||
130 | * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT | ||
131 | * GUARANTEE that "len" bytes is written even if the operation is successful. | ||
132 | */ | ||
133 | ssize_t xwrite(int fd, const void *buf, size_t len) | ||
134 | { | ||
135 | ssize_t nr; | ||
136 | while (1) { | ||
137 | nr = write(fd, buf, len); | ||
138 | if ((nr < 0) && (errno == EAGAIN || errno == EINTR)) | ||
139 | continue; | ||
140 | return nr; | ||
141 | } | ||
142 | } | ||
143 | |||
144 | ssize_t read_in_full(int fd, void *buf, size_t count) | ||
145 | { | ||
146 | char *p = buf; | ||
147 | ssize_t total = 0; | ||
148 | |||
149 | while (count > 0) { | ||
150 | ssize_t loaded = xread(fd, p, count); | ||
151 | if (loaded <= 0) | ||
152 | return total ? total : loaded; | ||
153 | count -= loaded; | ||
154 | p += loaded; | ||
155 | total += loaded; | ||
156 | } | ||
157 | |||
158 | return total; | ||
159 | } | ||
160 | |||
161 | ssize_t write_in_full(int fd, const void *buf, size_t count) | ||
162 | { | ||
163 | const char *p = buf; | ||
164 | ssize_t total = 0; | ||
165 | |||
166 | while (count > 0) { | ||
167 | ssize_t written = xwrite(fd, p, count); | ||
168 | if (written < 0) | ||
169 | return -1; | ||
170 | if (!written) { | ||
171 | errno = ENOSPC; | ||
172 | return -1; | ||
173 | } | ||
174 | count -= written; | ||
175 | p += written; | ||
176 | total += written; | ||
177 | } | ||
178 | |||
179 | return total; | ||
180 | } | ||
181 | |||
182 | int xdup(int fd) | ||
183 | { | ||
184 | int ret = dup(fd); | ||
185 | if (ret < 0) | ||
186 | die("dup failed: %s", strerror(errno)); | ||
187 | return ret; | ||
188 | } | ||
189 | |||
190 | FILE *xfdopen(int fd, const char *mode) | ||
191 | { | ||
192 | FILE *stream = fdopen(fd, mode); | ||
193 | if (stream == NULL) | ||
194 | die("Out of memory? fdopen failed: %s", strerror(errno)); | ||
195 | return stream; | ||
196 | } | ||
197 | |||
198 | int xmkstemp(char *template) | ||
199 | { | ||
200 | int fd; | ||
201 | |||
202 | fd = mkstemp(template); | ||
203 | if (fd < 0) | ||
204 | die("Unable to create temporary file: %s", strerror(errno)); | ||
205 | return fd; | ||
206 | } | ||