diff options
-rw-r--r-- | arch/x86/Kconfig.cpu | 1 | ||||
-rw-r--r-- | arch/x86/Kconfig.debug | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/ds.h | 82 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 35 | ||||
-rw-r--r-- | arch/x86/include/asm/ptrace.h | 9 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/ds.c | 921 | ||||
-rw-r--r-- | arch/x86/kernel/ds_selftest.c | 408 | ||||
-rw-r--r-- | arch/x86/kernel/ds_selftest.h | 15 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 284 | ||||
-rw-r--r-- | include/linux/mm.h | 7 | ||||
-rw-r--r-- | include/linux/ptrace.h | 10 | ||||
-rw-r--r-- | include/linux/sched.h | 13 | ||||
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/fork.c | 4 | ||||
-rw-r--r-- | kernel/ptrace.c | 10 | ||||
-rw-r--r-- | kernel/sched.c | 43 | ||||
-rw-r--r-- | kernel/trace/Makefile | 7 | ||||
-rw-r--r-- | kernel/trace/trace.h | 2 | ||||
-rw-r--r-- | kernel/trace/trace_hw_branches.c | 199 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 58 | ||||
-rw-r--r-- | mm/mlock.c | 51 |
25 files changed, 1616 insertions, 565 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8130334329c0..924e156a85ab 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -506,7 +506,6 @@ config X86_PTRACE_BTS | |||
506 | bool "Branch Trace Store" | 506 | bool "Branch Trace Store" |
507 | default y | 507 | default y |
508 | depends on X86_DEBUGCTLMSR | 508 | depends on X86_DEBUGCTLMSR |
509 | depends on BROKEN | ||
510 | ---help--- | 509 | ---help--- |
511 | This adds a ptrace interface to the hardware's branch trace store. | 510 | This adds a ptrace interface to the hardware's branch trace store. |
512 | 511 | ||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d8359e73317f..22b752e09487 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -167,6 +167,15 @@ config IOMMU_LEAK | |||
167 | Add a simple leak tracer to the IOMMU code. This is useful when you | 167 | Add a simple leak tracer to the IOMMU code. This is useful when you |
168 | are debugging a buggy device driver that leaks IOMMU mappings. | 168 | are debugging a buggy device driver that leaks IOMMU mappings. |
169 | 169 | ||
170 | config X86_DS_SELFTEST | ||
171 | bool "DS selftest" | ||
172 | default y | ||
173 | depends on DEBUG_KERNEL | ||
174 | depends on X86_DS | ||
175 | ---help--- | ||
176 | Perform Debug Store selftests at boot time. | ||
177 | If in doubt, say "N". | ||
178 | |||
170 | config HAVE_MMIOTRACE_SUPPORT | 179 | config HAVE_MMIOTRACE_SUPPORT |
171 | def_bool y | 180 | def_bool y |
172 | 181 | ||
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index a8f672ba100c..70dac199b093 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h | |||
@@ -15,8 +15,8 @@ | |||
15 | * - buffer allocation (memory accounting) | 15 | * - buffer allocation (memory accounting) |
16 | * | 16 | * |
17 | * | 17 | * |
18 | * Copyright (C) 2007-2008 Intel Corporation. | 18 | * Copyright (C) 2007-2009 Intel Corporation. |
19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 | 19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #ifndef _ASM_X86_DS_H | 22 | #ifndef _ASM_X86_DS_H |
@@ -83,8 +83,10 @@ enum ds_feature { | |||
83 | * The interrupt threshold is independent from the overflow callback | 83 | * The interrupt threshold is independent from the overflow callback |
84 | * to allow users to use their own overflow interrupt handling mechanism. | 84 | * to allow users to use their own overflow interrupt handling mechanism. |
85 | * | 85 | * |
86 | * task: the task to request recording for; | 86 | * The function might sleep. |
87 | * NULL for per-cpu recording on the current cpu | 87 | * |
88 | * task: the task to request recording for | ||
89 | * cpu: the cpu to request recording for | ||
88 | * base: the base pointer for the (non-pageable) buffer; | 90 | * base: the base pointer for the (non-pageable) buffer; |
89 | * size: the size of the provided buffer in bytes | 91 | * size: the size of the provided buffer in bytes |
90 | * ovfl: pointer to a function to be called on buffer overflow; | 92 | * ovfl: pointer to a function to be called on buffer overflow; |
@@ -93,19 +95,28 @@ enum ds_feature { | |||
93 | * -1 if no interrupt threshold is requested. | 95 | * -1 if no interrupt threshold is requested. |
94 | * flags: a bit-mask of the above flags | 96 | * flags: a bit-mask of the above flags |
95 | */ | 97 | */ |
96 | extern struct bts_tracer *ds_request_bts(struct task_struct *task, | 98 | extern struct bts_tracer *ds_request_bts_task(struct task_struct *task, |
97 | void *base, size_t size, | 99 | void *base, size_t size, |
98 | bts_ovfl_callback_t ovfl, | 100 | bts_ovfl_callback_t ovfl, |
99 | size_t th, unsigned int flags); | 101 | size_t th, unsigned int flags); |
100 | extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, | 102 | extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, |
101 | void *base, size_t size, | 103 | bts_ovfl_callback_t ovfl, |
102 | pebs_ovfl_callback_t ovfl, | 104 | size_t th, unsigned int flags); |
103 | size_t th, unsigned int flags); | 105 | extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, |
106 | void *base, size_t size, | ||
107 | pebs_ovfl_callback_t ovfl, | ||
108 | size_t th, unsigned int flags); | ||
109 | extern struct pebs_tracer *ds_request_pebs_cpu(int cpu, | ||
110 | void *base, size_t size, | ||
111 | pebs_ovfl_callback_t ovfl, | ||
112 | size_t th, unsigned int flags); | ||
104 | 113 | ||
105 | /* | 114 | /* |
106 | * Release BTS or PEBS resources | 115 | * Release BTS or PEBS resources |
107 | * Suspend and resume BTS or PEBS tracing | 116 | * Suspend and resume BTS or PEBS tracing |
108 | * | 117 | * |
118 | * Must be called with irq's enabled. | ||
119 | * | ||
109 | * tracer: the tracer handle returned from ds_request_~() | 120 | * tracer: the tracer handle returned from ds_request_~() |
110 | */ | 121 | */ |
111 | extern void ds_release_bts(struct bts_tracer *tracer); | 122 | extern void ds_release_bts(struct bts_tracer *tracer); |
@@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer); | |||
115 | extern void ds_suspend_pebs(struct pebs_tracer *tracer); | 126 | extern void ds_suspend_pebs(struct pebs_tracer *tracer); |
116 | extern void ds_resume_pebs(struct pebs_tracer *tracer); | 127 | extern void ds_resume_pebs(struct pebs_tracer *tracer); |
117 | 128 | ||
129 | /* | ||
130 | * Release BTS or PEBS resources | ||
131 | * Suspend and resume BTS or PEBS tracing | ||
132 | * | ||
133 | * Cpu tracers must call this on the traced cpu. | ||
134 | * Task tracers must call ds_release_~_noirq() for themselves. | ||
135 | * | ||
136 | * May be called with irq's disabled. | ||
137 | * | ||
138 | * Returns 0 if successful; | ||
139 | * -EPERM if the cpu tracer does not trace the current cpu. | ||
140 | * -EPERM if the task tracer does not trace itself. | ||
141 | * | ||
142 | * tracer: the tracer handle returned from ds_request_~() | ||
143 | */ | ||
144 | extern int ds_release_bts_noirq(struct bts_tracer *tracer); | ||
145 | extern int ds_suspend_bts_noirq(struct bts_tracer *tracer); | ||
146 | extern int ds_resume_bts_noirq(struct bts_tracer *tracer); | ||
147 | extern int ds_release_pebs_noirq(struct pebs_tracer *tracer); | ||
148 | extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer); | ||
149 | extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer); | ||
150 | |||
118 | 151 | ||
119 | /* | 152 | /* |
120 | * The raw DS buffer state as it is used for BTS and PEBS recording. | 153 | * The raw DS buffer state as it is used for BTS and PEBS recording. |
@@ -170,9 +203,9 @@ struct bts_struct { | |||
170 | } lbr; | 203 | } lbr; |
171 | /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ | 204 | /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ |
172 | struct { | 205 | struct { |
173 | __u64 jiffies; | 206 | __u64 clock; |
174 | pid_t pid; | 207 | pid_t pid; |
175 | } timestamp; | 208 | } event; |
176 | } variant; | 209 | } variant; |
177 | }; | 210 | }; |
178 | 211 | ||
@@ -201,8 +234,12 @@ struct bts_trace { | |||
201 | struct pebs_trace { | 234 | struct pebs_trace { |
202 | struct ds_trace ds; | 235 | struct ds_trace ds; |
203 | 236 | ||
204 | /* the PEBS reset value */ | 237 | /* the number of valid counters in the below array */ |
205 | unsigned long long reset_value; | 238 | unsigned int counters; |
239 | |||
240 | #define MAX_PEBS_COUNTERS 4 | ||
241 | /* the counter reset value */ | ||
242 | unsigned long long counter_reset[MAX_PEBS_COUNTERS]; | ||
206 | }; | 243 | }; |
207 | 244 | ||
208 | 245 | ||
@@ -237,9 +274,11 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer); | |||
237 | * Returns 0 on success; -Eerrno on error | 274 | * Returns 0 on success; -Eerrno on error |
238 | * | 275 | * |
239 | * tracer: the tracer handle returned from ds_request_pebs() | 276 | * tracer: the tracer handle returned from ds_request_pebs() |
277 | * counter: the index of the counter | ||
240 | * value: the new counter reset value | 278 | * value: the new counter reset value |
241 | */ | 279 | */ |
242 | extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); | 280 | extern int ds_set_pebs_reset(struct pebs_tracer *tracer, |
281 | unsigned int counter, u64 value); | ||
243 | 282 | ||
244 | /* | 283 | /* |
245 | * Initialization | 284 | * Initialization |
@@ -252,21 +291,12 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); | |||
252 | */ | 291 | */ |
253 | extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); | 292 | extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); |
254 | 293 | ||
255 | /* | ||
256 | * Task clone/init and cleanup work | ||
257 | */ | ||
258 | extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father); | ||
259 | extern void ds_exit_thread(struct task_struct *tsk); | ||
260 | |||
261 | #else /* CONFIG_X86_DS */ | 294 | #else /* CONFIG_X86_DS */ |
262 | 295 | ||
263 | struct cpuinfo_x86; | 296 | struct cpuinfo_x86; |
264 | static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} | 297 | static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} |
265 | static inline void ds_switch_to(struct task_struct *prev, | 298 | static inline void ds_switch_to(struct task_struct *prev, |
266 | struct task_struct *next) {} | 299 | struct task_struct *next) {} |
267 | static inline void ds_copy_thread(struct task_struct *tsk, | ||
268 | struct task_struct *father) {} | ||
269 | static inline void ds_exit_thread(struct task_struct *tsk) {} | ||
270 | 300 | ||
271 | #endif /* CONFIG_X86_DS */ | 301 | #endif /* CONFIG_X86_DS */ |
272 | #endif /* _ASM_X86_DS_H */ | 302 | #endif /* _ASM_X86_DS_H */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c2cceae709c8..0b2fab0051e0 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -460,14 +460,8 @@ struct thread_struct { | |||
460 | unsigned io_bitmap_max; | 460 | unsigned io_bitmap_max; |
461 | /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ | 461 | /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ |
462 | unsigned long debugctlmsr; | 462 | unsigned long debugctlmsr; |
463 | #ifdef CONFIG_X86_DS | 463 | /* Debug Store context; see asm/ds.h */ |
464 | /* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ | ||
465 | struct ds_context *ds_ctx; | 464 | struct ds_context *ds_ctx; |
466 | #endif /* CONFIG_X86_DS */ | ||
467 | #ifdef CONFIG_X86_PTRACE_BTS | ||
468 | /* the signal to send on a bts buffer overflow */ | ||
469 | unsigned int bts_ovfl_signal; | ||
470 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
471 | }; | 465 | }; |
472 | 466 | ||
473 | static inline unsigned long native_get_debugreg(int regno) | 467 | static inline unsigned long native_get_debugreg(int regno) |
@@ -795,6 +789,21 @@ static inline unsigned long get_debugctlmsr(void) | |||
795 | return debugctlmsr; | 789 | return debugctlmsr; |
796 | } | 790 | } |
797 | 791 | ||
792 | static inline unsigned long get_debugctlmsr_on_cpu(int cpu) | ||
793 | { | ||
794 | u64 debugctlmsr = 0; | ||
795 | u32 val1, val2; | ||
796 | |||
797 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
798 | if (boot_cpu_data.x86 < 6) | ||
799 | return 0; | ||
800 | #endif | ||
801 | rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2); | ||
802 | debugctlmsr = val1 | ((u64)val2 << 32); | ||
803 | |||
804 | return debugctlmsr; | ||
805 | } | ||
806 | |||
798 | static inline void update_debugctlmsr(unsigned long debugctlmsr) | 807 | static inline void update_debugctlmsr(unsigned long debugctlmsr) |
799 | { | 808 | { |
800 | #ifndef CONFIG_X86_DEBUGCTLMSR | 809 | #ifndef CONFIG_X86_DEBUGCTLMSR |
@@ -804,6 +813,18 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) | |||
804 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); | 813 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); |
805 | } | 814 | } |
806 | 815 | ||
816 | static inline void update_debugctlmsr_on_cpu(int cpu, | ||
817 | unsigned long debugctlmsr) | ||
818 | { | ||
819 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
820 | if (boot_cpu_data.x86 < 6) | ||
821 | return; | ||
822 | #endif | ||
823 | wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, | ||
824 | (u32)((u64)debugctlmsr), | ||
825 | (u32)((u64)debugctlmsr >> 32)); | ||
826 | } | ||
827 | |||
807 | /* | 828 | /* |
808 | * from system description table in BIOS. Mostly for MCA use, but | 829 | * from system description table in BIOS. Mostly for MCA use, but |
809 | * others may find it useful: | 830 | * others may find it useful: |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index e304b66abeea..5cdd19f20b5b 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -235,12 +235,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx, | |||
235 | extern int do_set_thread_area(struct task_struct *p, int idx, | 235 | extern int do_set_thread_area(struct task_struct *p, int idx, |
236 | struct user_desc __user *info, int can_allocate); | 236 | struct user_desc __user *info, int can_allocate); |
237 | 237 | ||
238 | extern void x86_ptrace_untrace(struct task_struct *); | 238 | #ifdef CONFIG_X86_PTRACE_BTS |
239 | extern void x86_ptrace_fork(struct task_struct *child, | 239 | extern void ptrace_bts_untrace(struct task_struct *tsk); |
240 | unsigned long clone_flags); | ||
241 | 240 | ||
242 | #define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk) | 241 | #define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk) |
243 | #define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags) | 242 | #endif /* CONFIG_X86_PTRACE_BTS */ |
244 | 243 | ||
245 | #endif /* __KERNEL__ */ | 244 | #endif /* __KERNEL__ */ |
246 | 245 | ||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 145cce75cda7..77df4d654ff9 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -44,6 +44,7 @@ obj-y += process.o | |||
44 | obj-y += i387.o xsave.o | 44 | obj-y += i387.o xsave.o |
45 | obj-y += ptrace.o | 45 | obj-y += ptrace.o |
46 | obj-$(CONFIG_X86_DS) += ds.o | 46 | obj-$(CONFIG_X86_DS) += ds.o |
47 | obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o | ||
47 | obj-$(CONFIG_X86_32) += tls.o | 48 | obj-$(CONFIG_X86_32) += tls.o |
48 | obj-$(CONFIG_IA32_EMULATION) += tls.o | 49 | obj-$(CONFIG_IA32_EMULATION) += tls.o |
49 | obj-y += step.o | 50 | obj-y += step.o |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 87b67e3a765a..48bfe1386038 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
@@ -19,45 +19,61 @@ | |||
19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 | 19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 |
20 | */ | 20 | */ |
21 | 21 | ||
22 | 22 | #include <linux/kernel.h> | |
23 | #include <asm/ds.h> | ||
24 | |||
25 | #include <linux/errno.h> | ||
26 | #include <linux/string.h> | 23 | #include <linux/string.h> |
27 | #include <linux/slab.h> | 24 | #include <linux/errno.h> |
28 | #include <linux/sched.h> | 25 | #include <linux/sched.h> |
26 | #include <linux/slab.h> | ||
29 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
30 | #include <linux/kernel.h> | 28 | #include <linux/trace_clock.h> |
29 | |||
30 | #include <asm/ds.h> | ||
31 | 31 | ||
32 | #include "ds_selftest.h" | ||
32 | 33 | ||
33 | /* | 34 | /* |
34 | * The configuration for a particular DS hardware implementation. | 35 | * The configuration for a particular DS hardware implementation: |
35 | */ | 36 | */ |
36 | struct ds_configuration { | 37 | struct ds_configuration { |
37 | /* the name of the configuration */ | 38 | /* The name of the configuration: */ |
38 | const char *name; | 39 | const char *name; |
39 | /* the size of one pointer-typed field in the DS structure and | 40 | |
40 | in the BTS and PEBS buffers in bytes; | 41 | /* The size of pointer-typed fields in DS, BTS, and PEBS: */ |
41 | this covers the first 8 DS fields related to buffer management. */ | 42 | unsigned char sizeof_ptr_field; |
42 | unsigned char sizeof_field; | 43 | |
43 | /* the size of a BTS/PEBS record in bytes */ | 44 | /* The size of a BTS/PEBS record in bytes: */ |
44 | unsigned char sizeof_rec[2]; | 45 | unsigned char sizeof_rec[2]; |
45 | /* a series of bit-masks to control various features indexed | 46 | |
46 | * by enum ds_feature */ | 47 | /* The number of pebs counter reset values in the DS structure. */ |
47 | unsigned long ctl[dsf_ctl_max]; | 48 | unsigned char nr_counter_reset; |
49 | |||
50 | /* Control bit-masks indexed by enum ds_feature: */ | ||
51 | unsigned long ctl[dsf_ctl_max]; | ||
48 | }; | 52 | }; |
49 | static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); | 53 | static struct ds_configuration ds_cfg __read_mostly; |
54 | |||
55 | |||
56 | /* Maximal size of a DS configuration: */ | ||
57 | #define MAX_SIZEOF_DS 0x80 | ||
50 | 58 | ||
51 | #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) | 59 | /* Maximal size of a BTS record: */ |
60 | #define MAX_SIZEOF_BTS (3 * 8) | ||
52 | 61 | ||
53 | #define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ | 62 | /* BTS and PEBS buffer alignment: */ |
54 | #define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ | 63 | #define DS_ALIGNMENT (1 << 3) |
55 | #define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ | ||
56 | 64 | ||
57 | #define BTS_CONTROL \ | 65 | /* Number of buffer pointers in DS: */ |
58 | (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ | 66 | #define NUM_DS_PTR_FIELDS 8 |
59 | ds_cfg.ctl[dsf_bts_overflow]) | ||
60 | 67 | ||
68 | /* Size of a pebs reset value in DS: */ | ||
69 | #define PEBS_RESET_FIELD_SIZE 8 | ||
70 | |||
71 | /* Mask of control bits in the DS MSR register: */ | ||
72 | #define BTS_CONTROL \ | ||
73 | ( ds_cfg.ctl[dsf_bts] | \ | ||
74 | ds_cfg.ctl[dsf_bts_kernel] | \ | ||
75 | ds_cfg.ctl[dsf_bts_user] | \ | ||
76 | ds_cfg.ctl[dsf_bts_overflow] ) | ||
61 | 77 | ||
62 | /* | 78 | /* |
63 | * A BTS or PEBS tracer. | 79 | * A BTS or PEBS tracer. |
@@ -66,29 +82,36 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); | |||
66 | * to identify tracers. | 82 | * to identify tracers. |
67 | */ | 83 | */ |
68 | struct ds_tracer { | 84 | struct ds_tracer { |
69 | /* the DS context (partially) owned by this tracer */ | 85 | /* The DS context (partially) owned by this tracer. */ |
70 | struct ds_context *context; | 86 | struct ds_context *context; |
71 | /* the buffer provided on ds_request() and its size in bytes */ | 87 | /* The buffer provided on ds_request() and its size in bytes. */ |
72 | void *buffer; | 88 | void *buffer; |
73 | size_t size; | 89 | size_t size; |
74 | }; | 90 | }; |
75 | 91 | ||
76 | struct bts_tracer { | 92 | struct bts_tracer { |
77 | /* the common DS part */ | 93 | /* The common DS part: */ |
78 | struct ds_tracer ds; | 94 | struct ds_tracer ds; |
79 | /* the trace including the DS configuration */ | 95 | |
80 | struct bts_trace trace; | 96 | /* The trace including the DS configuration: */ |
81 | /* buffer overflow notification function */ | 97 | struct bts_trace trace; |
82 | bts_ovfl_callback_t ovfl; | 98 | |
99 | /* Buffer overflow notification function: */ | ||
100 | bts_ovfl_callback_t ovfl; | ||
101 | |||
102 | /* Active flags affecting trace collection. */ | ||
103 | unsigned int flags; | ||
83 | }; | 104 | }; |
84 | 105 | ||
85 | struct pebs_tracer { | 106 | struct pebs_tracer { |
86 | /* the common DS part */ | 107 | /* The common DS part: */ |
87 | struct ds_tracer ds; | 108 | struct ds_tracer ds; |
88 | /* the trace including the DS configuration */ | 109 | |
89 | struct pebs_trace trace; | 110 | /* The trace including the DS configuration: */ |
90 | /* buffer overflow notification function */ | 111 | struct pebs_trace trace; |
91 | pebs_ovfl_callback_t ovfl; | 112 | |
113 | /* Buffer overflow notification function: */ | ||
114 | pebs_ovfl_callback_t ovfl; | ||
92 | }; | 115 | }; |
93 | 116 | ||
94 | /* | 117 | /* |
@@ -97,6 +120,7 @@ struct pebs_tracer { | |||
97 | * | 120 | * |
98 | * The DS configuration consists of the following fields; different | 121 | * The DS configuration consists of the following fields; different |
99 | * architetures vary in the size of those fields. | 122 | * architetures vary in the size of those fields. |
123 | * | ||
100 | * - double-word aligned base linear address of the BTS buffer | 124 | * - double-word aligned base linear address of the BTS buffer |
101 | * - write pointer into the BTS buffer | 125 | * - write pointer into the BTS buffer |
102 | * - end linear address of the BTS buffer (one byte beyond the end of | 126 | * - end linear address of the BTS buffer (one byte beyond the end of |
@@ -135,21 +159,22 @@ enum ds_field { | |||
135 | }; | 159 | }; |
136 | 160 | ||
137 | enum ds_qualifier { | 161 | enum ds_qualifier { |
138 | ds_bts = 0, | 162 | ds_bts = 0, |
139 | ds_pebs | 163 | ds_pebs |
140 | }; | 164 | }; |
141 | 165 | ||
142 | static inline unsigned long ds_get(const unsigned char *base, | 166 | static inline unsigned long |
143 | enum ds_qualifier qual, enum ds_field field) | 167 | ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field) |
144 | { | 168 | { |
145 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | 169 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); |
146 | return *(unsigned long *)base; | 170 | return *(unsigned long *)base; |
147 | } | 171 | } |
148 | 172 | ||
149 | static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | 173 | static inline void |
150 | enum ds_field field, unsigned long value) | 174 | ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field, |
175 | unsigned long value) | ||
151 | { | 176 | { |
152 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | 177 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); |
153 | (*(unsigned long *)base) = value; | 178 | (*(unsigned long *)base) = value; |
154 | } | 179 | } |
155 | 180 | ||
@@ -159,7 +184,6 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | |||
159 | */ | 184 | */ |
160 | static DEFINE_SPINLOCK(ds_lock); | 185 | static DEFINE_SPINLOCK(ds_lock); |
161 | 186 | ||
162 | |||
163 | /* | 187 | /* |
164 | * We either support (system-wide) per-cpu or per-thread allocation. | 188 | * We either support (system-wide) per-cpu or per-thread allocation. |
165 | * We distinguish the two based on the task_struct pointer, where a | 189 | * We distinguish the two based on the task_struct pointer, where a |
@@ -178,12 +202,28 @@ static DEFINE_SPINLOCK(ds_lock); | |||
178 | */ | 202 | */ |
179 | static atomic_t tracers = ATOMIC_INIT(0); | 203 | static atomic_t tracers = ATOMIC_INIT(0); |
180 | 204 | ||
181 | static inline void get_tracer(struct task_struct *task) | 205 | static inline int get_tracer(struct task_struct *task) |
182 | { | 206 | { |
183 | if (task) | 207 | int error; |
208 | |||
209 | spin_lock_irq(&ds_lock); | ||
210 | |||
211 | if (task) { | ||
212 | error = -EPERM; | ||
213 | if (atomic_read(&tracers) < 0) | ||
214 | goto out; | ||
184 | atomic_inc(&tracers); | 215 | atomic_inc(&tracers); |
185 | else | 216 | } else { |
217 | error = -EPERM; | ||
218 | if (atomic_read(&tracers) > 0) | ||
219 | goto out; | ||
186 | atomic_dec(&tracers); | 220 | atomic_dec(&tracers); |
221 | } | ||
222 | |||
223 | error = 0; | ||
224 | out: | ||
225 | spin_unlock_irq(&ds_lock); | ||
226 | return error; | ||
187 | } | 227 | } |
188 | 228 | ||
189 | static inline void put_tracer(struct task_struct *task) | 229 | static inline void put_tracer(struct task_struct *task) |
@@ -194,14 +234,6 @@ static inline void put_tracer(struct task_struct *task) | |||
194 | atomic_inc(&tracers); | 234 | atomic_inc(&tracers); |
195 | } | 235 | } |
196 | 236 | ||
197 | static inline int check_tracer(struct task_struct *task) | ||
198 | { | ||
199 | return task ? | ||
200 | (atomic_read(&tracers) >= 0) : | ||
201 | (atomic_read(&tracers) <= 0); | ||
202 | } | ||
203 | |||
204 | |||
205 | /* | 237 | /* |
206 | * The DS context is either attached to a thread or to a cpu: | 238 | * The DS context is either attached to a thread or to a cpu: |
207 | * - in the former case, the thread_struct contains a pointer to the | 239 | * - in the former case, the thread_struct contains a pointer to the |
@@ -213,61 +245,58 @@ static inline int check_tracer(struct task_struct *task) | |||
213 | * deallocated when the last user puts the context. | 245 | * deallocated when the last user puts the context. |
214 | */ | 246 | */ |
215 | struct ds_context { | 247 | struct ds_context { |
216 | /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ | 248 | /* The DS configuration; goes into MSR_IA32_DS_AREA: */ |
217 | unsigned char ds[MAX_SIZEOF_DS]; | 249 | unsigned char ds[MAX_SIZEOF_DS]; |
218 | /* the owner of the BTS and PEBS configuration, respectively */ | 250 | |
219 | struct bts_tracer *bts_master; | 251 | /* The owner of the BTS and PEBS configuration, respectively: */ |
220 | struct pebs_tracer *pebs_master; | 252 | struct bts_tracer *bts_master; |
221 | /* use count */ | 253 | struct pebs_tracer *pebs_master; |
222 | unsigned long count; | ||
223 | /* a pointer to the context location inside the thread_struct | ||
224 | * or the per_cpu context array */ | ||
225 | struct ds_context **this; | ||
226 | /* a pointer to the task owning this context, or NULL, if the | ||
227 | * context is owned by a cpu */ | ||
228 | struct task_struct *task; | ||
229 | }; | ||
230 | 254 | ||
231 | static DEFINE_PER_CPU(struct ds_context *, system_context_array); | 255 | /* Use count: */ |
256 | unsigned long count; | ||
232 | 257 | ||
233 | #define system_context per_cpu(system_context_array, smp_processor_id()) | 258 | /* Pointer to the context pointer field: */ |
259 | struct ds_context **this; | ||
260 | |||
261 | /* The traced task; NULL for cpu tracing: */ | ||
262 | struct task_struct *task; | ||
263 | |||
264 | /* The traced cpu; only valid if task is NULL: */ | ||
265 | int cpu; | ||
266 | }; | ||
234 | 267 | ||
268 | static DEFINE_PER_CPU(struct ds_context *, cpu_context); | ||
235 | 269 | ||
236 | static inline struct ds_context *ds_get_context(struct task_struct *task) | 270 | |
271 | static struct ds_context *ds_get_context(struct task_struct *task, int cpu) | ||
237 | { | 272 | { |
238 | struct ds_context **p_context = | 273 | struct ds_context **p_context = |
239 | (task ? &task->thread.ds_ctx : &system_context); | 274 | (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu)); |
240 | struct ds_context *context = NULL; | 275 | struct ds_context *context = NULL; |
241 | struct ds_context *new_context = NULL; | 276 | struct ds_context *new_context = NULL; |
242 | unsigned long irq; | ||
243 | 277 | ||
244 | /* Chances are small that we already have a context. */ | 278 | /* Chances are small that we already have a context. */ |
245 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); | 279 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); |
246 | if (!new_context) | 280 | if (!new_context) |
247 | return NULL; | 281 | return NULL; |
248 | 282 | ||
249 | spin_lock_irqsave(&ds_lock, irq); | 283 | spin_lock_irq(&ds_lock); |
250 | 284 | ||
251 | context = *p_context; | 285 | context = *p_context; |
252 | if (!context) { | 286 | if (likely(!context)) { |
253 | context = new_context; | 287 | context = new_context; |
254 | 288 | ||
255 | context->this = p_context; | 289 | context->this = p_context; |
256 | context->task = task; | 290 | context->task = task; |
291 | context->cpu = cpu; | ||
257 | context->count = 0; | 292 | context->count = 0; |
258 | 293 | ||
259 | if (task) | ||
260 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
261 | |||
262 | if (!task || (task == current)) | ||
263 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); | ||
264 | |||
265 | *p_context = context; | 294 | *p_context = context; |
266 | } | 295 | } |
267 | 296 | ||
268 | context->count++; | 297 | context->count++; |
269 | 298 | ||
270 | spin_unlock_irqrestore(&ds_lock, irq); | 299 | spin_unlock_irq(&ds_lock); |
271 | 300 | ||
272 | if (context != new_context) | 301 | if (context != new_context) |
273 | kfree(new_context); | 302 | kfree(new_context); |
@@ -275,8 +304,9 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) | |||
275 | return context; | 304 | return context; |
276 | } | 305 | } |
277 | 306 | ||
278 | static inline void ds_put_context(struct ds_context *context) | 307 | static void ds_put_context(struct ds_context *context) |
279 | { | 308 | { |
309 | struct task_struct *task; | ||
280 | unsigned long irq; | 310 | unsigned long irq; |
281 | 311 | ||
282 | if (!context) | 312 | if (!context) |
@@ -291,17 +321,55 @@ static inline void ds_put_context(struct ds_context *context) | |||
291 | 321 | ||
292 | *(context->this) = NULL; | 322 | *(context->this) = NULL; |
293 | 323 | ||
294 | if (context->task) | 324 | task = context->task; |
295 | clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | 325 | |
326 | if (task) | ||
327 | clear_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
296 | 328 | ||
297 | if (!context->task || (context->task == current)) | 329 | /* |
298 | wrmsrl(MSR_IA32_DS_AREA, 0); | 330 | * We leave the (now dangling) pointer to the DS configuration in |
331 | * the DS_AREA msr. This is as good or as bad as replacing it with | ||
332 | * NULL - the hardware would crash if we enabled tracing. | ||
333 | * | ||
334 | * This saves us some problems with having to write an msr on a | ||
335 | * different cpu while preventing others from doing the same for the | ||
336 | * next context for that same cpu. | ||
337 | */ | ||
299 | 338 | ||
300 | spin_unlock_irqrestore(&ds_lock, irq); | 339 | spin_unlock_irqrestore(&ds_lock, irq); |
301 | 340 | ||
341 | /* The context might still be in use for context switching. */ | ||
342 | if (task && (task != current)) | ||
343 | wait_task_context_switch(task); | ||
344 | |||
302 | kfree(context); | 345 | kfree(context); |
303 | } | 346 | } |
304 | 347 | ||
348 | static void ds_install_ds_area(struct ds_context *context) | ||
349 | { | ||
350 | unsigned long ds; | ||
351 | |||
352 | ds = (unsigned long)context->ds; | ||
353 | |||
354 | /* | ||
355 | * There is a race between the bts master and the pebs master. | ||
356 | * | ||
357 | * The thread/cpu access is synchronized via get/put_cpu() for | ||
358 | * task tracing and via wrmsr_on_cpu for cpu tracing. | ||
359 | * | ||
360 | * If bts and pebs are collected for the same task or same cpu, | ||
361 | * the same confiuration is written twice. | ||
362 | */ | ||
363 | if (context->task) { | ||
364 | get_cpu(); | ||
365 | if (context->task == current) | ||
366 | wrmsrl(MSR_IA32_DS_AREA, ds); | ||
367 | set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | ||
368 | put_cpu(); | ||
369 | } else | ||
370 | wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA, | ||
371 | (u32)((u64)ds), (u32)((u64)ds >> 32)); | ||
372 | } | ||
305 | 373 | ||
306 | /* | 374 | /* |
307 | * Call the tracer's callback on a buffer overflow. | 375 | * Call the tracer's callback on a buffer overflow. |
@@ -332,9 +400,9 @@ static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) | |||
332 | * The remainder of any partially written record is zeroed out. | 400 | * The remainder of any partially written record is zeroed out. |
333 | * | 401 | * |
334 | * context: the DS context | 402 | * context: the DS context |
335 | * qual: the buffer type | 403 | * qual: the buffer type |
336 | * record: the data to write | 404 | * record: the data to write |
337 | * size: the size of the data | 405 | * size: the size of the data |
338 | */ | 406 | */ |
339 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, | 407 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, |
340 | const void *record, size_t size) | 408 | const void *record, size_t size) |
@@ -349,14 +417,14 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, | |||
349 | unsigned long write_size, adj_write_size; | 417 | unsigned long write_size, adj_write_size; |
350 | 418 | ||
351 | /* | 419 | /* |
352 | * write as much as possible without producing an | 420 | * Write as much as possible without producing an |
353 | * overflow interrupt. | 421 | * overflow interrupt. |
354 | * | 422 | * |
355 | * interrupt_threshold must either be | 423 | * Interrupt_threshold must either be |
356 | * - bigger than absolute_maximum or | 424 | * - bigger than absolute_maximum or |
357 | * - point to a record between buffer_base and absolute_maximum | 425 | * - point to a record between buffer_base and absolute_maximum |
358 | * | 426 | * |
359 | * index points to a valid record. | 427 | * Index points to a valid record. |
360 | */ | 428 | */ |
361 | base = ds_get(context->ds, qual, ds_buffer_base); | 429 | base = ds_get(context->ds, qual, ds_buffer_base); |
362 | index = ds_get(context->ds, qual, ds_index); | 430 | index = ds_get(context->ds, qual, ds_index); |
@@ -365,8 +433,10 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, | |||
365 | 433 | ||
366 | write_end = min(end, int_th); | 434 | write_end = min(end, int_th); |
367 | 435 | ||
368 | /* if we are already beyond the interrupt threshold, | 436 | /* |
369 | * we fill the entire buffer */ | 437 | * If we are already beyond the interrupt threshold, |
438 | * we fill the entire buffer. | ||
439 | */ | ||
370 | if (write_end <= index) | 440 | if (write_end <= index) |
371 | write_end = end; | 441 | write_end = end; |
372 | 442 | ||
@@ -383,7 +453,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, | |||
383 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | 453 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; |
384 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | 454 | adj_write_size *= ds_cfg.sizeof_rec[qual]; |
385 | 455 | ||
386 | /* zero out trailing bytes */ | 456 | /* Zero out trailing bytes. */ |
387 | memset((char *)index + write_size, 0, | 457 | memset((char *)index + write_size, 0, |
388 | adj_write_size - write_size); | 458 | adj_write_size - write_size); |
389 | index += adj_write_size; | 459 | index += adj_write_size; |
@@ -410,7 +480,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, | |||
410 | * Later architectures use 64bit pointers throughout, whereas earlier | 480 | * Later architectures use 64bit pointers throughout, whereas earlier |
411 | * architectures use 32bit pointers in 32bit mode. | 481 | * architectures use 32bit pointers in 32bit mode. |
412 | * | 482 | * |
413 | * We compute the base address for the first 8 fields based on: | 483 | * We compute the base address for the fields based on: |
414 | * - the field size stored in the DS configuration | 484 | * - the field size stored in the DS configuration |
415 | * - the relative field position | 485 | * - the relative field position |
416 | * | 486 | * |
@@ -431,23 +501,23 @@ enum bts_field { | |||
431 | bts_to, | 501 | bts_to, |
432 | bts_flags, | 502 | bts_flags, |
433 | 503 | ||
434 | bts_qual = bts_from, | 504 | bts_qual = bts_from, |
435 | bts_jiffies = bts_to, | 505 | bts_clock = bts_to, |
436 | bts_pid = bts_flags, | 506 | bts_pid = bts_flags, |
437 | 507 | ||
438 | bts_qual_mask = (bts_qual_max - 1), | 508 | bts_qual_mask = (bts_qual_max - 1), |
439 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | 509 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) |
440 | }; | 510 | }; |
441 | 511 | ||
442 | static inline unsigned long bts_get(const char *base, enum bts_field field) | 512 | static inline unsigned long bts_get(const char *base, enum bts_field field) |
443 | { | 513 | { |
444 | base += (ds_cfg.sizeof_field * field); | 514 | base += (ds_cfg.sizeof_ptr_field * field); |
445 | return *(unsigned long *)base; | 515 | return *(unsigned long *)base; |
446 | } | 516 | } |
447 | 517 | ||
448 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) | 518 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) |
449 | { | 519 | { |
450 | base += (ds_cfg.sizeof_field * field);; | 520 | base += (ds_cfg.sizeof_ptr_field * field);; |
451 | (*(unsigned long *)base) = val; | 521 | (*(unsigned long *)base) = val; |
452 | } | 522 | } |
453 | 523 | ||
@@ -463,8 +533,8 @@ static inline void bts_set(char *base, enum bts_field field, unsigned long val) | |||
463 | * | 533 | * |
464 | * return: bytes read/written on success; -Eerrno, otherwise | 534 | * return: bytes read/written on success; -Eerrno, otherwise |
465 | */ | 535 | */ |
466 | static int bts_read(struct bts_tracer *tracer, const void *at, | 536 | static int |
467 | struct bts_struct *out) | 537 | bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out) |
468 | { | 538 | { |
469 | if (!tracer) | 539 | if (!tracer) |
470 | return -EINVAL; | 540 | return -EINVAL; |
@@ -478,8 +548,8 @@ static int bts_read(struct bts_tracer *tracer, const void *at, | |||
478 | memset(out, 0, sizeof(*out)); | 548 | memset(out, 0, sizeof(*out)); |
479 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { | 549 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { |
480 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); | 550 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); |
481 | out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); | 551 | out->variant.event.clock = bts_get(at, bts_clock); |
482 | out->variant.timestamp.pid = bts_get(at, bts_pid); | 552 | out->variant.event.pid = bts_get(at, bts_pid); |
483 | } else { | 553 | } else { |
484 | out->qualifier = bts_branch; | 554 | out->qualifier = bts_branch; |
485 | out->variant.lbr.from = bts_get(at, bts_from); | 555 | out->variant.lbr.from = bts_get(at, bts_from); |
@@ -516,8 +586,8 @@ static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) | |||
516 | case bts_task_arrives: | 586 | case bts_task_arrives: |
517 | case bts_task_departs: | 587 | case bts_task_departs: |
518 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | 588 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); |
519 | bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); | 589 | bts_set(raw, bts_clock, in->variant.event.clock); |
520 | bts_set(raw, bts_pid, in->variant.timestamp.pid); | 590 | bts_set(raw, bts_pid, in->variant.event.pid); |
521 | break; | 591 | break; |
522 | default: | 592 | default: |
523 | return -EINVAL; | 593 | return -EINVAL; |
@@ -555,7 +625,8 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | |||
555 | unsigned int flags) { | 625 | unsigned int flags) { |
556 | unsigned long buffer, adj; | 626 | unsigned long buffer, adj; |
557 | 627 | ||
558 | /* adjust the buffer address and size to meet alignment | 628 | /* |
629 | * Adjust the buffer address and size to meet alignment | ||
559 | * constraints: | 630 | * constraints: |
560 | * - buffer is double-word aligned | 631 | * - buffer is double-word aligned |
561 | * - size is multiple of record size | 632 | * - size is multiple of record size |
@@ -577,9 +648,11 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | |||
577 | trace->begin = (void *)buffer; | 648 | trace->begin = (void *)buffer; |
578 | trace->top = trace->begin; | 649 | trace->top = trace->begin; |
579 | trace->end = (void *)(buffer + size); | 650 | trace->end = (void *)(buffer + size); |
580 | /* The value for 'no threshold' is -1, which will set the | 651 | /* |
652 | * The value for 'no threshold' is -1, which will set the | ||
581 | * threshold outside of the buffer, just like we want it. | 653 | * threshold outside of the buffer, just like we want it. |
582 | */ | 654 | */ |
655 | ith *= ds_cfg.sizeof_rec[qual]; | ||
583 | trace->ith = (void *)(buffer + size - ith); | 656 | trace->ith = (void *)(buffer + size - ith); |
584 | 657 | ||
585 | trace->flags = flags; | 658 | trace->flags = flags; |
@@ -588,18 +661,27 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | |||
588 | 661 | ||
589 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, | 662 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, |
590 | enum ds_qualifier qual, struct task_struct *task, | 663 | enum ds_qualifier qual, struct task_struct *task, |
591 | void *base, size_t size, size_t th, unsigned int flags) | 664 | int cpu, void *base, size_t size, size_t th) |
592 | { | 665 | { |
593 | struct ds_context *context; | 666 | struct ds_context *context; |
594 | int error; | 667 | int error; |
668 | size_t req_size; | ||
669 | |||
670 | error = -EOPNOTSUPP; | ||
671 | if (!ds_cfg.sizeof_rec[qual]) | ||
672 | goto out; | ||
595 | 673 | ||
596 | error = -EINVAL; | 674 | error = -EINVAL; |
597 | if (!base) | 675 | if (!base) |
598 | goto out; | 676 | goto out; |
599 | 677 | ||
600 | /* we require some space to do alignment adjustments below */ | 678 | req_size = ds_cfg.sizeof_rec[qual]; |
679 | /* We might need space for alignment adjustments. */ | ||
680 | if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT)) | ||
681 | req_size += DS_ALIGNMENT; | ||
682 | |||
601 | error = -EINVAL; | 683 | error = -EINVAL; |
602 | if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) | 684 | if (size < req_size) |
603 | goto out; | 685 | goto out; |
604 | 686 | ||
605 | if (th != (size_t)-1) { | 687 | if (th != (size_t)-1) { |
@@ -614,182 +696,318 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, | |||
614 | tracer->size = size; | 696 | tracer->size = size; |
615 | 697 | ||
616 | error = -ENOMEM; | 698 | error = -ENOMEM; |
617 | context = ds_get_context(task); | 699 | context = ds_get_context(task, cpu); |
618 | if (!context) | 700 | if (!context) |
619 | goto out; | 701 | goto out; |
620 | tracer->context = context; | 702 | tracer->context = context; |
621 | 703 | ||
622 | ds_init_ds_trace(trace, qual, base, size, th, flags); | 704 | /* |
705 | * Defer any tracer-specific initialization work for the context until | ||
706 | * context ownership has been clarified. | ||
707 | */ | ||
623 | 708 | ||
624 | error = 0; | 709 | error = 0; |
625 | out: | 710 | out: |
626 | return error; | 711 | return error; |
627 | } | 712 | } |
628 | 713 | ||
629 | struct bts_tracer *ds_request_bts(struct task_struct *task, | 714 | static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu, |
630 | void *base, size_t size, | 715 | void *base, size_t size, |
631 | bts_ovfl_callback_t ovfl, size_t th, | 716 | bts_ovfl_callback_t ovfl, size_t th, |
632 | unsigned int flags) | 717 | unsigned int flags) |
633 | { | 718 | { |
634 | struct bts_tracer *tracer; | 719 | struct bts_tracer *tracer; |
635 | unsigned long irq; | ||
636 | int error; | 720 | int error; |
637 | 721 | ||
722 | /* Buffer overflow notification is not yet implemented. */ | ||
638 | error = -EOPNOTSUPP; | 723 | error = -EOPNOTSUPP; |
639 | if (!ds_cfg.ctl[dsf_bts]) | 724 | if (ovfl) |
640 | goto out; | 725 | goto out; |
641 | 726 | ||
642 | /* buffer overflow notification is not yet implemented */ | 727 | error = get_tracer(task); |
643 | error = -EOPNOTSUPP; | 728 | if (error < 0) |
644 | if (ovfl) | ||
645 | goto out; | 729 | goto out; |
646 | 730 | ||
647 | error = -ENOMEM; | 731 | error = -ENOMEM; |
648 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | 732 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
649 | if (!tracer) | 733 | if (!tracer) |
650 | goto out; | 734 | goto out_put_tracer; |
651 | tracer->ovfl = ovfl; | 735 | tracer->ovfl = ovfl; |
652 | 736 | ||
737 | /* Do some more error checking and acquire a tracing context. */ | ||
653 | error = ds_request(&tracer->ds, &tracer->trace.ds, | 738 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
654 | ds_bts, task, base, size, th, flags); | 739 | ds_bts, task, cpu, base, size, th); |
655 | if (error < 0) | 740 | if (error < 0) |
656 | goto out_tracer; | 741 | goto out_tracer; |
657 | 742 | ||
658 | 743 | /* Claim the bts part of the tracing context we acquired above. */ | |
659 | spin_lock_irqsave(&ds_lock, irq); | 744 | spin_lock_irq(&ds_lock); |
660 | |||
661 | error = -EPERM; | ||
662 | if (!check_tracer(task)) | ||
663 | goto out_unlock; | ||
664 | get_tracer(task); | ||
665 | 745 | ||
666 | error = -EPERM; | 746 | error = -EPERM; |
667 | if (tracer->ds.context->bts_master) | 747 | if (tracer->ds.context->bts_master) |
668 | goto out_put_tracer; | 748 | goto out_unlock; |
669 | tracer->ds.context->bts_master = tracer; | 749 | tracer->ds.context->bts_master = tracer; |
670 | 750 | ||
671 | spin_unlock_irqrestore(&ds_lock, irq); | 751 | spin_unlock_irq(&ds_lock); |
672 | 752 | ||
753 | /* | ||
754 | * Now that we own the bts part of the context, let's complete the | ||
755 | * initialization for that part. | ||
756 | */ | ||
757 | ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags); | ||
758 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
759 | ds_install_ds_area(tracer->ds.context); | ||
673 | 760 | ||
674 | tracer->trace.read = bts_read; | 761 | tracer->trace.read = bts_read; |
675 | tracer->trace.write = bts_write; | 762 | tracer->trace.write = bts_write; |
676 | 763 | ||
677 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | 764 | /* Start tracing. */ |
678 | ds_resume_bts(tracer); | 765 | ds_resume_bts(tracer); |
679 | 766 | ||
680 | return tracer; | 767 | return tracer; |
681 | 768 | ||
682 | out_put_tracer: | ||
683 | put_tracer(task); | ||
684 | out_unlock: | 769 | out_unlock: |
685 | spin_unlock_irqrestore(&ds_lock, irq); | 770 | spin_unlock_irq(&ds_lock); |
686 | ds_put_context(tracer->ds.context); | 771 | ds_put_context(tracer->ds.context); |
687 | out_tracer: | 772 | out_tracer: |
688 | kfree(tracer); | 773 | kfree(tracer); |
774 | out_put_tracer: | ||
775 | put_tracer(task); | ||
689 | out: | 776 | out: |
690 | return ERR_PTR(error); | 777 | return ERR_PTR(error); |
691 | } | 778 | } |
692 | 779 | ||
693 | struct pebs_tracer *ds_request_pebs(struct task_struct *task, | 780 | struct bts_tracer *ds_request_bts_task(struct task_struct *task, |
694 | void *base, size_t size, | 781 | void *base, size_t size, |
695 | pebs_ovfl_callback_t ovfl, size_t th, | 782 | bts_ovfl_callback_t ovfl, |
696 | unsigned int flags) | 783 | size_t th, unsigned int flags) |
784 | { | ||
785 | return ds_request_bts(task, 0, base, size, ovfl, th, flags); | ||
786 | } | ||
787 | |||
788 | struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, | ||
789 | bts_ovfl_callback_t ovfl, | ||
790 | size_t th, unsigned int flags) | ||
791 | { | ||
792 | return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags); | ||
793 | } | ||
794 | |||
795 | static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu, | ||
796 | void *base, size_t size, | ||
797 | pebs_ovfl_callback_t ovfl, size_t th, | ||
798 | unsigned int flags) | ||
697 | { | 799 | { |
698 | struct pebs_tracer *tracer; | 800 | struct pebs_tracer *tracer; |
699 | unsigned long irq; | ||
700 | int error; | 801 | int error; |
701 | 802 | ||
702 | /* buffer overflow notification is not yet implemented */ | 803 | /* Buffer overflow notification is not yet implemented. */ |
703 | error = -EOPNOTSUPP; | 804 | error = -EOPNOTSUPP; |
704 | if (ovfl) | 805 | if (ovfl) |
705 | goto out; | 806 | goto out; |
706 | 807 | ||
808 | error = get_tracer(task); | ||
809 | if (error < 0) | ||
810 | goto out; | ||
811 | |||
707 | error = -ENOMEM; | 812 | error = -ENOMEM; |
708 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | 813 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
709 | if (!tracer) | 814 | if (!tracer) |
710 | goto out; | 815 | goto out_put_tracer; |
711 | tracer->ovfl = ovfl; | 816 | tracer->ovfl = ovfl; |
712 | 817 | ||
818 | /* Do some more error checking and acquire a tracing context. */ | ||
713 | error = ds_request(&tracer->ds, &tracer->trace.ds, | 819 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
714 | ds_pebs, task, base, size, th, flags); | 820 | ds_pebs, task, cpu, base, size, th); |
715 | if (error < 0) | 821 | if (error < 0) |
716 | goto out_tracer; | 822 | goto out_tracer; |
717 | 823 | ||
718 | spin_lock_irqsave(&ds_lock, irq); | 824 | /* Claim the pebs part of the tracing context we acquired above. */ |
719 | 825 | spin_lock_irq(&ds_lock); | |
720 | error = -EPERM; | ||
721 | if (!check_tracer(task)) | ||
722 | goto out_unlock; | ||
723 | get_tracer(task); | ||
724 | 826 | ||
725 | error = -EPERM; | 827 | error = -EPERM; |
726 | if (tracer->ds.context->pebs_master) | 828 | if (tracer->ds.context->pebs_master) |
727 | goto out_put_tracer; | 829 | goto out_unlock; |
728 | tracer->ds.context->pebs_master = tracer; | 830 | tracer->ds.context->pebs_master = tracer; |
729 | 831 | ||
730 | spin_unlock_irqrestore(&ds_lock, irq); | 832 | spin_unlock_irq(&ds_lock); |
731 | 833 | ||
834 | /* | ||
835 | * Now that we own the pebs part of the context, let's complete the | ||
836 | * initialization for that part. | ||
837 | */ | ||
838 | ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags); | ||
732 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | 839 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); |
840 | ds_install_ds_area(tracer->ds.context); | ||
841 | |||
842 | /* Start tracing. */ | ||
733 | ds_resume_pebs(tracer); | 843 | ds_resume_pebs(tracer); |
734 | 844 | ||
735 | return tracer; | 845 | return tracer; |
736 | 846 | ||
737 | out_put_tracer: | ||
738 | put_tracer(task); | ||
739 | out_unlock: | 847 | out_unlock: |
740 | spin_unlock_irqrestore(&ds_lock, irq); | 848 | spin_unlock_irq(&ds_lock); |
741 | ds_put_context(tracer->ds.context); | 849 | ds_put_context(tracer->ds.context); |
742 | out_tracer: | 850 | out_tracer: |
743 | kfree(tracer); | 851 | kfree(tracer); |
852 | out_put_tracer: | ||
853 | put_tracer(task); | ||
744 | out: | 854 | out: |
745 | return ERR_PTR(error); | 855 | return ERR_PTR(error); |
746 | } | 856 | } |
747 | 857 | ||
748 | void ds_release_bts(struct bts_tracer *tracer) | 858 | struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, |
859 | void *base, size_t size, | ||
860 | pebs_ovfl_callback_t ovfl, | ||
861 | size_t th, unsigned int flags) | ||
749 | { | 862 | { |
750 | if (!tracer) | 863 | return ds_request_pebs(task, 0, base, size, ovfl, th, flags); |
751 | return; | 864 | } |
752 | 865 | ||
753 | ds_suspend_bts(tracer); | 866 | struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size, |
867 | pebs_ovfl_callback_t ovfl, | ||
868 | size_t th, unsigned int flags) | ||
869 | { | ||
870 | return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags); | ||
871 | } | ||
872 | |||
873 | static void ds_free_bts(struct bts_tracer *tracer) | ||
874 | { | ||
875 | struct task_struct *task; | ||
876 | |||
877 | task = tracer->ds.context->task; | ||
754 | 878 | ||
755 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); | 879 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); |
756 | tracer->ds.context->bts_master = NULL; | 880 | tracer->ds.context->bts_master = NULL; |
757 | 881 | ||
758 | put_tracer(tracer->ds.context->task); | 882 | /* Make sure tracing stopped and the tracer is not in use. */ |
883 | if (task && (task != current)) | ||
884 | wait_task_context_switch(task); | ||
885 | |||
759 | ds_put_context(tracer->ds.context); | 886 | ds_put_context(tracer->ds.context); |
887 | put_tracer(task); | ||
760 | 888 | ||
761 | kfree(tracer); | 889 | kfree(tracer); |
762 | } | 890 | } |
763 | 891 | ||
892 | void ds_release_bts(struct bts_tracer *tracer) | ||
893 | { | ||
894 | might_sleep(); | ||
895 | |||
896 | if (!tracer) | ||
897 | return; | ||
898 | |||
899 | ds_suspend_bts(tracer); | ||
900 | ds_free_bts(tracer); | ||
901 | } | ||
902 | |||
903 | int ds_release_bts_noirq(struct bts_tracer *tracer) | ||
904 | { | ||
905 | struct task_struct *task; | ||
906 | unsigned long irq; | ||
907 | int error; | ||
908 | |||
909 | if (!tracer) | ||
910 | return 0; | ||
911 | |||
912 | task = tracer->ds.context->task; | ||
913 | |||
914 | local_irq_save(irq); | ||
915 | |||
916 | error = -EPERM; | ||
917 | if (!task && | ||
918 | (tracer->ds.context->cpu != smp_processor_id())) | ||
919 | goto out; | ||
920 | |||
921 | error = -EPERM; | ||
922 | if (task && (task != current)) | ||
923 | goto out; | ||
924 | |||
925 | ds_suspend_bts_noirq(tracer); | ||
926 | ds_free_bts(tracer); | ||
927 | |||
928 | error = 0; | ||
929 | out: | ||
930 | local_irq_restore(irq); | ||
931 | return error; | ||
932 | } | ||
933 | |||
934 | static void update_task_debugctlmsr(struct task_struct *task, | ||
935 | unsigned long debugctlmsr) | ||
936 | { | ||
937 | task->thread.debugctlmsr = debugctlmsr; | ||
938 | |||
939 | get_cpu(); | ||
940 | if (task == current) | ||
941 | update_debugctlmsr(debugctlmsr); | ||
942 | put_cpu(); | ||
943 | } | ||
944 | |||
764 | void ds_suspend_bts(struct bts_tracer *tracer) | 945 | void ds_suspend_bts(struct bts_tracer *tracer) |
765 | { | 946 | { |
766 | struct task_struct *task; | 947 | struct task_struct *task; |
948 | unsigned long debugctlmsr; | ||
949 | int cpu; | ||
767 | 950 | ||
768 | if (!tracer) | 951 | if (!tracer) |
769 | return; | 952 | return; |
770 | 953 | ||
954 | tracer->flags = 0; | ||
955 | |||
771 | task = tracer->ds.context->task; | 956 | task = tracer->ds.context->task; |
957 | cpu = tracer->ds.context->cpu; | ||
772 | 958 | ||
773 | if (!task || (task == current)) | 959 | WARN_ON(!task && irqs_disabled()); |
774 | update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); | ||
775 | 960 | ||
776 | if (task) { | 961 | debugctlmsr = (task ? |
777 | task->thread.debugctlmsr &= ~BTS_CONTROL; | 962 | task->thread.debugctlmsr : |
963 | get_debugctlmsr_on_cpu(cpu)); | ||
964 | debugctlmsr &= ~BTS_CONTROL; | ||
778 | 965 | ||
779 | if (!task->thread.debugctlmsr) | 966 | if (task) |
780 | clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | 967 | update_task_debugctlmsr(task, debugctlmsr); |
781 | } | 968 | else |
969 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
782 | } | 970 | } |
783 | 971 | ||
784 | void ds_resume_bts(struct bts_tracer *tracer) | 972 | int ds_suspend_bts_noirq(struct bts_tracer *tracer) |
785 | { | 973 | { |
786 | struct task_struct *task; | 974 | struct task_struct *task; |
787 | unsigned long control; | 975 | unsigned long debugctlmsr, irq; |
976 | int cpu, error = 0; | ||
788 | 977 | ||
789 | if (!tracer) | 978 | if (!tracer) |
790 | return; | 979 | return 0; |
980 | |||
981 | tracer->flags = 0; | ||
791 | 982 | ||
792 | task = tracer->ds.context->task; | 983 | task = tracer->ds.context->task; |
984 | cpu = tracer->ds.context->cpu; | ||
985 | |||
986 | local_irq_save(irq); | ||
987 | |||
988 | error = -EPERM; | ||
989 | if (!task && (cpu != smp_processor_id())) | ||
990 | goto out; | ||
991 | |||
992 | debugctlmsr = (task ? | ||
993 | task->thread.debugctlmsr : | ||
994 | get_debugctlmsr()); | ||
995 | debugctlmsr &= ~BTS_CONTROL; | ||
996 | |||
997 | if (task) | ||
998 | update_task_debugctlmsr(task, debugctlmsr); | ||
999 | else | ||
1000 | update_debugctlmsr(debugctlmsr); | ||
1001 | |||
1002 | error = 0; | ||
1003 | out: | ||
1004 | local_irq_restore(irq); | ||
1005 | return error; | ||
1006 | } | ||
1007 | |||
1008 | static unsigned long ds_bts_control(struct bts_tracer *tracer) | ||
1009 | { | ||
1010 | unsigned long control; | ||
793 | 1011 | ||
794 | control = ds_cfg.ctl[dsf_bts]; | 1012 | control = ds_cfg.ctl[dsf_bts]; |
795 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) | 1013 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) |
@@ -797,41 +1015,149 @@ void ds_resume_bts(struct bts_tracer *tracer) | |||
797 | if (!(tracer->trace.ds.flags & BTS_USER)) | 1015 | if (!(tracer->trace.ds.flags & BTS_USER)) |
798 | control |= ds_cfg.ctl[dsf_bts_user]; | 1016 | control |= ds_cfg.ctl[dsf_bts_user]; |
799 | 1017 | ||
800 | if (task) { | 1018 | return control; |
801 | task->thread.debugctlmsr |= control; | ||
802 | set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | ||
803 | } | ||
804 | |||
805 | if (!task || (task == current)) | ||
806 | update_debugctlmsr(get_debugctlmsr() | control); | ||
807 | } | 1019 | } |
808 | 1020 | ||
809 | void ds_release_pebs(struct pebs_tracer *tracer) | 1021 | void ds_resume_bts(struct bts_tracer *tracer) |
810 | { | 1022 | { |
1023 | struct task_struct *task; | ||
1024 | unsigned long debugctlmsr; | ||
1025 | int cpu; | ||
1026 | |||
811 | if (!tracer) | 1027 | if (!tracer) |
812 | return; | 1028 | return; |
813 | 1029 | ||
814 | ds_suspend_pebs(tracer); | 1030 | tracer->flags = tracer->trace.ds.flags; |
1031 | |||
1032 | task = tracer->ds.context->task; | ||
1033 | cpu = tracer->ds.context->cpu; | ||
1034 | |||
1035 | WARN_ON(!task && irqs_disabled()); | ||
1036 | |||
1037 | debugctlmsr = (task ? | ||
1038 | task->thread.debugctlmsr : | ||
1039 | get_debugctlmsr_on_cpu(cpu)); | ||
1040 | debugctlmsr |= ds_bts_control(tracer); | ||
1041 | |||
1042 | if (task) | ||
1043 | update_task_debugctlmsr(task, debugctlmsr); | ||
1044 | else | ||
1045 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
1046 | } | ||
1047 | |||
1048 | int ds_resume_bts_noirq(struct bts_tracer *tracer) | ||
1049 | { | ||
1050 | struct task_struct *task; | ||
1051 | unsigned long debugctlmsr, irq; | ||
1052 | int cpu, error = 0; | ||
1053 | |||
1054 | if (!tracer) | ||
1055 | return 0; | ||
1056 | |||
1057 | tracer->flags = tracer->trace.ds.flags; | ||
1058 | |||
1059 | task = tracer->ds.context->task; | ||
1060 | cpu = tracer->ds.context->cpu; | ||
1061 | |||
1062 | local_irq_save(irq); | ||
1063 | |||
1064 | error = -EPERM; | ||
1065 | if (!task && (cpu != smp_processor_id())) | ||
1066 | goto out; | ||
1067 | |||
1068 | debugctlmsr = (task ? | ||
1069 | task->thread.debugctlmsr : | ||
1070 | get_debugctlmsr()); | ||
1071 | debugctlmsr |= ds_bts_control(tracer); | ||
1072 | |||
1073 | if (task) | ||
1074 | update_task_debugctlmsr(task, debugctlmsr); | ||
1075 | else | ||
1076 | update_debugctlmsr(debugctlmsr); | ||
1077 | |||
1078 | error = 0; | ||
1079 | out: | ||
1080 | local_irq_restore(irq); | ||
1081 | return error; | ||
1082 | } | ||
1083 | |||
1084 | static void ds_free_pebs(struct pebs_tracer *tracer) | ||
1085 | { | ||
1086 | struct task_struct *task; | ||
1087 | |||
1088 | task = tracer->ds.context->task; | ||
815 | 1089 | ||
816 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); | 1090 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); |
817 | tracer->ds.context->pebs_master = NULL; | 1091 | tracer->ds.context->pebs_master = NULL; |
818 | 1092 | ||
819 | put_tracer(tracer->ds.context->task); | ||
820 | ds_put_context(tracer->ds.context); | 1093 | ds_put_context(tracer->ds.context); |
1094 | put_tracer(task); | ||
821 | 1095 | ||
822 | kfree(tracer); | 1096 | kfree(tracer); |
823 | } | 1097 | } |
824 | 1098 | ||
1099 | void ds_release_pebs(struct pebs_tracer *tracer) | ||
1100 | { | ||
1101 | might_sleep(); | ||
1102 | |||
1103 | if (!tracer) | ||
1104 | return; | ||
1105 | |||
1106 | ds_suspend_pebs(tracer); | ||
1107 | ds_free_pebs(tracer); | ||
1108 | } | ||
1109 | |||
1110 | int ds_release_pebs_noirq(struct pebs_tracer *tracer) | ||
1111 | { | ||
1112 | struct task_struct *task; | ||
1113 | unsigned long irq; | ||
1114 | int error; | ||
1115 | |||
1116 | if (!tracer) | ||
1117 | return 0; | ||
1118 | |||
1119 | task = tracer->ds.context->task; | ||
1120 | |||
1121 | local_irq_save(irq); | ||
1122 | |||
1123 | error = -EPERM; | ||
1124 | if (!task && | ||
1125 | (tracer->ds.context->cpu != smp_processor_id())) | ||
1126 | goto out; | ||
1127 | |||
1128 | error = -EPERM; | ||
1129 | if (task && (task != current)) | ||
1130 | goto out; | ||
1131 | |||
1132 | ds_suspend_pebs_noirq(tracer); | ||
1133 | ds_free_pebs(tracer); | ||
1134 | |||
1135 | error = 0; | ||
1136 | out: | ||
1137 | local_irq_restore(irq); | ||
1138 | return error; | ||
1139 | } | ||
1140 | |||
825 | void ds_suspend_pebs(struct pebs_tracer *tracer) | 1141 | void ds_suspend_pebs(struct pebs_tracer *tracer) |
826 | { | 1142 | { |
827 | 1143 | ||
828 | } | 1144 | } |
829 | 1145 | ||
1146 | int ds_suspend_pebs_noirq(struct pebs_tracer *tracer) | ||
1147 | { | ||
1148 | return 0; | ||
1149 | } | ||
1150 | |||
830 | void ds_resume_pebs(struct pebs_tracer *tracer) | 1151 | void ds_resume_pebs(struct pebs_tracer *tracer) |
831 | { | 1152 | { |
832 | 1153 | ||
833 | } | 1154 | } |
834 | 1155 | ||
1156 | int ds_resume_pebs_noirq(struct pebs_tracer *tracer) | ||
1157 | { | ||
1158 | return 0; | ||
1159 | } | ||
1160 | |||
835 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) | 1161 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) |
836 | { | 1162 | { |
837 | if (!tracer) | 1163 | if (!tracer) |
@@ -847,8 +1173,12 @@ const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) | |||
847 | return NULL; | 1173 | return NULL; |
848 | 1174 | ||
849 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | 1175 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); |
850 | tracer->trace.reset_value = | 1176 | |
851 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); | 1177 | tracer->trace.counters = ds_cfg.nr_counter_reset; |
1178 | memcpy(tracer->trace.counter_reset, | ||
1179 | tracer->ds.context->ds + | ||
1180 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field), | ||
1181 | ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE); | ||
852 | 1182 | ||
853 | return &tracer->trace; | 1183 | return &tracer->trace; |
854 | } | 1184 | } |
@@ -873,18 +1203,24 @@ int ds_reset_pebs(struct pebs_tracer *tracer) | |||
873 | 1203 | ||
874 | tracer->trace.ds.top = tracer->trace.ds.begin; | 1204 | tracer->trace.ds.top = tracer->trace.ds.begin; |
875 | 1205 | ||
876 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, | 1206 | ds_set(tracer->ds.context->ds, ds_pebs, ds_index, |
877 | (unsigned long)tracer->trace.ds.top); | 1207 | (unsigned long)tracer->trace.ds.top); |
878 | 1208 | ||
879 | return 0; | 1209 | return 0; |
880 | } | 1210 | } |
881 | 1211 | ||
882 | int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) | 1212 | int ds_set_pebs_reset(struct pebs_tracer *tracer, |
1213 | unsigned int counter, u64 value) | ||
883 | { | 1214 | { |
884 | if (!tracer) | 1215 | if (!tracer) |
885 | return -EINVAL; | 1216 | return -EINVAL; |
886 | 1217 | ||
887 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; | 1218 | if (ds_cfg.nr_counter_reset < counter) |
1219 | return -EINVAL; | ||
1220 | |||
1221 | *(u64 *)(tracer->ds.context->ds + | ||
1222 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) + | ||
1223 | (counter * PEBS_RESET_FIELD_SIZE)) = value; | ||
888 | 1224 | ||
889 | return 0; | 1225 | return 0; |
890 | } | 1226 | } |
@@ -894,73 +1230,117 @@ static const struct ds_configuration ds_cfg_netburst = { | |||
894 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), | 1230 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), |
895 | .ctl[dsf_bts_kernel] = (1 << 5), | 1231 | .ctl[dsf_bts_kernel] = (1 << 5), |
896 | .ctl[dsf_bts_user] = (1 << 6), | 1232 | .ctl[dsf_bts_user] = (1 << 6), |
897 | 1233 | .nr_counter_reset = 1, | |
898 | .sizeof_field = sizeof(long), | ||
899 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
900 | #ifdef __i386__ | ||
901 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, | ||
902 | #else | ||
903 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, | ||
904 | #endif | ||
905 | }; | 1234 | }; |
906 | static const struct ds_configuration ds_cfg_pentium_m = { | 1235 | static const struct ds_configuration ds_cfg_pentium_m = { |
907 | .name = "Pentium M", | 1236 | .name = "Pentium M", |
908 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | 1237 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
909 | 1238 | .nr_counter_reset = 1, | |
910 | .sizeof_field = sizeof(long), | ||
911 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
912 | #ifdef __i386__ | ||
913 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, | ||
914 | #else | ||
915 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, | ||
916 | #endif | ||
917 | }; | 1239 | }; |
918 | static const struct ds_configuration ds_cfg_core2_atom = { | 1240 | static const struct ds_configuration ds_cfg_core2_atom = { |
919 | .name = "Core 2/Atom", | 1241 | .name = "Core 2/Atom", |
920 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | 1242 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
921 | .ctl[dsf_bts_kernel] = (1 << 9), | 1243 | .ctl[dsf_bts_kernel] = (1 << 9), |
922 | .ctl[dsf_bts_user] = (1 << 10), | 1244 | .ctl[dsf_bts_user] = (1 << 10), |
923 | 1245 | .nr_counter_reset = 1, | |
924 | .sizeof_field = 8, | 1246 | }; |
925 | .sizeof_rec[ds_bts] = 8 * 3, | 1247 | static const struct ds_configuration ds_cfg_core_i7 = { |
926 | .sizeof_rec[ds_pebs] = 8 * 18, | 1248 | .name = "Core i7", |
1249 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
1250 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
1251 | .ctl[dsf_bts_user] = (1 << 10), | ||
1252 | .nr_counter_reset = 4, | ||
927 | }; | 1253 | }; |
928 | 1254 | ||
929 | static void | 1255 | static void |
930 | ds_configure(const struct ds_configuration *cfg) | 1256 | ds_configure(const struct ds_configuration *cfg, |
1257 | struct cpuinfo_x86 *cpu) | ||
931 | { | 1258 | { |
1259 | unsigned long nr_pebs_fields = 0; | ||
1260 | |||
1261 | printk(KERN_INFO "[ds] using %s configuration\n", cfg->name); | ||
1262 | |||
1263 | #ifdef __i386__ | ||
1264 | nr_pebs_fields = 10; | ||
1265 | #else | ||
1266 | nr_pebs_fields = 18; | ||
1267 | #endif | ||
1268 | |||
1269 | /* | ||
1270 | * Starting with version 2, architectural performance | ||
1271 | * monitoring supports a format specifier. | ||
1272 | */ | ||
1273 | if ((cpuid_eax(0xa) & 0xff) > 1) { | ||
1274 | unsigned long perf_capabilities, format; | ||
1275 | |||
1276 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities); | ||
1277 | |||
1278 | format = (perf_capabilities >> 8) & 0xf; | ||
1279 | |||
1280 | switch (format) { | ||
1281 | case 0: | ||
1282 | nr_pebs_fields = 18; | ||
1283 | break; | ||
1284 | case 1: | ||
1285 | nr_pebs_fields = 22; | ||
1286 | break; | ||
1287 | default: | ||
1288 | printk(KERN_INFO | ||
1289 | "[ds] unknown PEBS format: %lu\n", format); | ||
1290 | nr_pebs_fields = 0; | ||
1291 | break; | ||
1292 | } | ||
1293 | } | ||
1294 | |||
932 | memset(&ds_cfg, 0, sizeof(ds_cfg)); | 1295 | memset(&ds_cfg, 0, sizeof(ds_cfg)); |
933 | ds_cfg = *cfg; | 1296 | ds_cfg = *cfg; |
934 | 1297 | ||
935 | printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); | 1298 | ds_cfg.sizeof_ptr_field = |
1299 | (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4); | ||
1300 | |||
1301 | ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3; | ||
1302 | ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields; | ||
936 | 1303 | ||
937 | if (!cpu_has_bts) { | 1304 | if (!cpu_has(cpu, X86_FEATURE_BTS)) { |
938 | ds_cfg.ctl[dsf_bts] = 0; | 1305 | ds_cfg.sizeof_rec[ds_bts] = 0; |
939 | printk(KERN_INFO "[ds] bts not available\n"); | 1306 | printk(KERN_INFO "[ds] bts not available\n"); |
940 | } | 1307 | } |
941 | if (!cpu_has_pebs) | 1308 | if (!cpu_has(cpu, X86_FEATURE_PEBS)) { |
1309 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
942 | printk(KERN_INFO "[ds] pebs not available\n"); | 1310 | printk(KERN_INFO "[ds] pebs not available\n"); |
1311 | } | ||
1312 | |||
1313 | printk(KERN_INFO "[ds] sizes: address: %u bit, ", | ||
1314 | 8 * ds_cfg.sizeof_ptr_field); | ||
1315 | printk("bts/pebs record: %u/%u bytes\n", | ||
1316 | ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]); | ||
943 | 1317 | ||
944 | WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); | 1318 | WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset); |
945 | } | 1319 | } |
946 | 1320 | ||
947 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | 1321 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) |
948 | { | 1322 | { |
1323 | /* Only configure the first cpu. Others are identical. */ | ||
1324 | if (ds_cfg.name) | ||
1325 | return; | ||
1326 | |||
949 | switch (c->x86) { | 1327 | switch (c->x86) { |
950 | case 0x6: | 1328 | case 0x6: |
951 | switch (c->x86_model) { | 1329 | switch (c->x86_model) { |
952 | case 0x9: | 1330 | case 0x9: |
953 | case 0xd: /* Pentium M */ | 1331 | case 0xd: /* Pentium M */ |
954 | ds_configure(&ds_cfg_pentium_m); | 1332 | ds_configure(&ds_cfg_pentium_m, c); |
955 | break; | 1333 | break; |
956 | case 0xf: | 1334 | case 0xf: |
957 | case 0x17: /* Core2 */ | 1335 | case 0x17: /* Core2 */ |
958 | case 0x1c: /* Atom */ | 1336 | case 0x1c: /* Atom */ |
959 | ds_configure(&ds_cfg_core2_atom); | 1337 | ds_configure(&ds_cfg_core2_atom, c); |
1338 | break; | ||
1339 | case 0x1a: /* Core i7 */ | ||
1340 | ds_configure(&ds_cfg_core_i7, c); | ||
960 | break; | 1341 | break; |
961 | case 0x1a: /* i7 */ | ||
962 | default: | 1342 | default: |
963 | /* sorry, don't know about them */ | 1343 | /* Sorry, don't know about them. */ |
964 | break; | 1344 | break; |
965 | } | 1345 | } |
966 | break; | 1346 | break; |
@@ -969,64 +1349,89 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
969 | case 0x0: | 1349 | case 0x0: |
970 | case 0x1: | 1350 | case 0x1: |
971 | case 0x2: /* Netburst */ | 1351 | case 0x2: /* Netburst */ |
972 | ds_configure(&ds_cfg_netburst); | 1352 | ds_configure(&ds_cfg_netburst, c); |
973 | break; | 1353 | break; |
974 | default: | 1354 | default: |
975 | /* sorry, don't know about them */ | 1355 | /* Sorry, don't know about them. */ |
976 | break; | 1356 | break; |
977 | } | 1357 | } |
978 | break; | 1358 | break; |
979 | default: | 1359 | default: |
980 | /* sorry, don't know about them */ | 1360 | /* Sorry, don't know about them. */ |
981 | break; | 1361 | break; |
982 | } | 1362 | } |
983 | } | 1363 | } |
984 | 1364 | ||
1365 | static inline void ds_take_timestamp(struct ds_context *context, | ||
1366 | enum bts_qualifier qualifier, | ||
1367 | struct task_struct *task) | ||
1368 | { | ||
1369 | struct bts_tracer *tracer = context->bts_master; | ||
1370 | struct bts_struct ts; | ||
1371 | |||
1372 | /* Prevent compilers from reading the tracer pointer twice. */ | ||
1373 | barrier(); | ||
1374 | |||
1375 | if (!tracer || !(tracer->flags & BTS_TIMESTAMPS)) | ||
1376 | return; | ||
1377 | |||
1378 | memset(&ts, 0, sizeof(ts)); | ||
1379 | ts.qualifier = qualifier; | ||
1380 | ts.variant.event.clock = trace_clock_global(); | ||
1381 | ts.variant.event.pid = task->pid; | ||
1382 | |||
1383 | bts_write(tracer, &ts); | ||
1384 | } | ||
1385 | |||
985 | /* | 1386 | /* |
986 | * Change the DS configuration from tracing prev to tracing next. | 1387 | * Change the DS configuration from tracing prev to tracing next. |
987 | */ | 1388 | */ |
988 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | 1389 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) |
989 | { | 1390 | { |
990 | struct ds_context *prev_ctx = prev->thread.ds_ctx; | 1391 | struct ds_context *prev_ctx = prev->thread.ds_ctx; |
991 | struct ds_context *next_ctx = next->thread.ds_ctx; | 1392 | struct ds_context *next_ctx = next->thread.ds_ctx; |
1393 | unsigned long debugctlmsr = next->thread.debugctlmsr; | ||
1394 | |||
1395 | /* Make sure all data is read before we start. */ | ||
1396 | barrier(); | ||
992 | 1397 | ||
993 | if (prev_ctx) { | 1398 | if (prev_ctx) { |
994 | update_debugctlmsr(0); | 1399 | update_debugctlmsr(0); |
995 | 1400 | ||
996 | if (prev_ctx->bts_master && | 1401 | ds_take_timestamp(prev_ctx, bts_task_departs, prev); |
997 | (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
998 | struct bts_struct ts = { | ||
999 | .qualifier = bts_task_departs, | ||
1000 | .variant.timestamp.jiffies = jiffies_64, | ||
1001 | .variant.timestamp.pid = prev->pid | ||
1002 | }; | ||
1003 | bts_write(prev_ctx->bts_master, &ts); | ||
1004 | } | ||
1005 | } | 1402 | } |
1006 | 1403 | ||
1007 | if (next_ctx) { | 1404 | if (next_ctx) { |
1008 | if (next_ctx->bts_master && | 1405 | ds_take_timestamp(next_ctx, bts_task_arrives, next); |
1009 | (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
1010 | struct bts_struct ts = { | ||
1011 | .qualifier = bts_task_arrives, | ||
1012 | .variant.timestamp.jiffies = jiffies_64, | ||
1013 | .variant.timestamp.pid = next->pid | ||
1014 | }; | ||
1015 | bts_write(next_ctx->bts_master, &ts); | ||
1016 | } | ||
1017 | 1406 | ||
1018 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | 1407 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); |
1019 | } | 1408 | } |
1020 | 1409 | ||
1021 | update_debugctlmsr(next->thread.debugctlmsr); | 1410 | update_debugctlmsr(debugctlmsr); |
1022 | } | 1411 | } |
1023 | 1412 | ||
1024 | void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) | 1413 | static __init int ds_selftest(void) |
1025 | { | 1414 | { |
1026 | clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); | 1415 | if (ds_cfg.sizeof_rec[ds_bts]) { |
1027 | tsk->thread.ds_ctx = NULL; | 1416 | int error; |
1028 | } | ||
1029 | 1417 | ||
1030 | void ds_exit_thread(struct task_struct *tsk) | 1418 | error = ds_selftest_bts(); |
1031 | { | 1419 | if (error) { |
1420 | WARN(1, "[ds] selftest failed. disabling bts.\n"); | ||
1421 | ds_cfg.sizeof_rec[ds_bts] = 0; | ||
1422 | } | ||
1423 | } | ||
1424 | |||
1425 | if (ds_cfg.sizeof_rec[ds_pebs]) { | ||
1426 | int error; | ||
1427 | |||
1428 | error = ds_selftest_pebs(); | ||
1429 | if (error) { | ||
1430 | WARN(1, "[ds] selftest failed. disabling pebs.\n"); | ||
1431 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
1432 | } | ||
1433 | } | ||
1434 | |||
1435 | return 0; | ||
1032 | } | 1436 | } |
1437 | device_initcall(ds_selftest); | ||
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c new file mode 100644 index 000000000000..6bc7c199ab99 --- /dev/null +++ b/arch/x86/kernel/ds_selftest.c | |||
@@ -0,0 +1,408 @@ | |||
1 | /* | ||
2 | * Debug Store support - selftest | ||
3 | * | ||
4 | * | ||
5 | * Copyright (C) 2009 Intel Corporation. | ||
6 | * Markus Metzger <markus.t.metzger@intel.com>, 2009 | ||
7 | */ | ||
8 | |||
9 | #include "ds_selftest.h" | ||
10 | |||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/string.h> | ||
13 | #include <linux/smp.h> | ||
14 | #include <linux/cpu.h> | ||
15 | |||
16 | #include <asm/ds.h> | ||
17 | |||
18 | |||
19 | #define BUFFER_SIZE 521 /* Intentionally chose an odd size. */ | ||
20 | #define SMALL_BUFFER_SIZE 24 /* A single bts entry. */ | ||
21 | |||
22 | struct ds_selftest_bts_conf { | ||
23 | struct bts_tracer *tracer; | ||
24 | int error; | ||
25 | int (*suspend)(struct bts_tracer *); | ||
26 | int (*resume)(struct bts_tracer *); | ||
27 | }; | ||
28 | |||
29 | static int ds_selftest_bts_consistency(const struct bts_trace *trace) | ||
30 | { | ||
31 | int error = 0; | ||
32 | |||
33 | if (!trace) { | ||
34 | printk(KERN_CONT "failed to access trace..."); | ||
35 | /* Bail out. Other tests are pointless. */ | ||
36 | return -1; | ||
37 | } | ||
38 | |||
39 | if (!trace->read) { | ||
40 | printk(KERN_CONT "bts read not available..."); | ||
41 | error = -1; | ||
42 | } | ||
43 | |||
44 | /* Do some sanity checks on the trace configuration. */ | ||
45 | if (!trace->ds.n) { | ||
46 | printk(KERN_CONT "empty bts buffer..."); | ||
47 | error = -1; | ||
48 | } | ||
49 | if (!trace->ds.size) { | ||
50 | printk(KERN_CONT "bad bts trace setup..."); | ||
51 | error = -1; | ||
52 | } | ||
53 | if (trace->ds.end != | ||
54 | (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) { | ||
55 | printk(KERN_CONT "bad bts buffer setup..."); | ||
56 | error = -1; | ||
57 | } | ||
58 | /* | ||
59 | * We allow top in [begin; end], since its not clear when the | ||
60 | * overflow adjustment happens: after the increment or before the | ||
61 | * write. | ||
62 | */ | ||
63 | if ((trace->ds.top < trace->ds.begin) || | ||
64 | (trace->ds.end < trace->ds.top)) { | ||
65 | printk(KERN_CONT "bts top out of bounds..."); | ||
66 | error = -1; | ||
67 | } | ||
68 | |||
69 | return error; | ||
70 | } | ||
71 | |||
72 | static int ds_selftest_bts_read(struct bts_tracer *tracer, | ||
73 | const struct bts_trace *trace, | ||
74 | const void *from, const void *to) | ||
75 | { | ||
76 | const unsigned char *at; | ||
77 | |||
78 | /* | ||
79 | * Check a few things which do not belong to this test. | ||
80 | * They should be covered by other tests. | ||
81 | */ | ||
82 | if (!trace) | ||
83 | return -1; | ||
84 | |||
85 | if (!trace->read) | ||
86 | return -1; | ||
87 | |||
88 | if (to < from) | ||
89 | return -1; | ||
90 | |||
91 | if (from < trace->ds.begin) | ||
92 | return -1; | ||
93 | |||
94 | if (trace->ds.end < to) | ||
95 | return -1; | ||
96 | |||
97 | if (!trace->ds.size) | ||
98 | return -1; | ||
99 | |||
100 | /* Now to the test itself. */ | ||
101 | for (at = from; (void *)at < to; at += trace->ds.size) { | ||
102 | struct bts_struct bts; | ||
103 | unsigned long index; | ||
104 | int error; | ||
105 | |||
106 | if (((void *)at - trace->ds.begin) % trace->ds.size) { | ||
107 | printk(KERN_CONT | ||
108 | "read from non-integer index..."); | ||
109 | return -1; | ||
110 | } | ||
111 | index = ((void *)at - trace->ds.begin) / trace->ds.size; | ||
112 | |||
113 | memset(&bts, 0, sizeof(bts)); | ||
114 | error = trace->read(tracer, at, &bts); | ||
115 | if (error < 0) { | ||
116 | printk(KERN_CONT | ||
117 | "error reading bts trace at [%lu] (0x%p)...", | ||
118 | index, at); | ||
119 | return error; | ||
120 | } | ||
121 | |||
122 | switch (bts.qualifier) { | ||
123 | case BTS_BRANCH: | ||
124 | break; | ||
125 | default: | ||
126 | printk(KERN_CONT | ||
127 | "unexpected bts entry %llu at [%lu] (0x%p)...", | ||
128 | bts.qualifier, index, at); | ||
129 | return -1; | ||
130 | } | ||
131 | } | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | static void ds_selftest_bts_cpu(void *arg) | ||
137 | { | ||
138 | struct ds_selftest_bts_conf *conf = arg; | ||
139 | const struct bts_trace *trace; | ||
140 | void *top; | ||
141 | |||
142 | if (IS_ERR(conf->tracer)) { | ||
143 | conf->error = PTR_ERR(conf->tracer); | ||
144 | conf->tracer = NULL; | ||
145 | |||
146 | printk(KERN_CONT | ||
147 | "initialization failed (err: %d)...", conf->error); | ||
148 | return; | ||
149 | } | ||
150 | |||
151 | /* We should meanwhile have enough trace. */ | ||
152 | conf->error = conf->suspend(conf->tracer); | ||
153 | if (conf->error < 0) | ||
154 | return; | ||
155 | |||
156 | /* Let's see if we can access the trace. */ | ||
157 | trace = ds_read_bts(conf->tracer); | ||
158 | |||
159 | conf->error = ds_selftest_bts_consistency(trace); | ||
160 | if (conf->error < 0) | ||
161 | return; | ||
162 | |||
163 | /* If everything went well, we should have a few trace entries. */ | ||
164 | if (trace->ds.top == trace->ds.begin) { | ||
165 | /* | ||
166 | * It is possible but highly unlikely that we got a | ||
167 | * buffer overflow and end up at exactly the same | ||
168 | * position we started from. | ||
169 | * Let's issue a warning, but continue. | ||
170 | */ | ||
171 | printk(KERN_CONT "no trace/overflow..."); | ||
172 | } | ||
173 | |||
174 | /* Let's try to read the trace we collected. */ | ||
175 | conf->error = | ||
176 | ds_selftest_bts_read(conf->tracer, trace, | ||
177 | trace->ds.begin, trace->ds.top); | ||
178 | if (conf->error < 0) | ||
179 | return; | ||
180 | |||
181 | /* | ||
182 | * Let's read the trace again. | ||
183 | * Since we suspended tracing, we should get the same result. | ||
184 | */ | ||
185 | top = trace->ds.top; | ||
186 | |||
187 | trace = ds_read_bts(conf->tracer); | ||
188 | conf->error = ds_selftest_bts_consistency(trace); | ||
189 | if (conf->error < 0) | ||
190 | return; | ||
191 | |||
192 | if (top != trace->ds.top) { | ||
193 | printk(KERN_CONT "suspend not working..."); | ||
194 | conf->error = -1; | ||
195 | return; | ||
196 | } | ||
197 | |||
198 | /* Let's collect some more trace - see if resume is working. */ | ||
199 | conf->error = conf->resume(conf->tracer); | ||
200 | if (conf->error < 0) | ||
201 | return; | ||
202 | |||
203 | conf->error = conf->suspend(conf->tracer); | ||
204 | if (conf->error < 0) | ||
205 | return; | ||
206 | |||
207 | trace = ds_read_bts(conf->tracer); | ||
208 | |||
209 | conf->error = ds_selftest_bts_consistency(trace); | ||
210 | if (conf->error < 0) | ||
211 | return; | ||
212 | |||
213 | if (trace->ds.top == top) { | ||
214 | /* | ||
215 | * It is possible but highly unlikely that we got a | ||
216 | * buffer overflow and end up at exactly the same | ||
217 | * position we started from. | ||
218 | * Let's issue a warning and check the full trace. | ||
219 | */ | ||
220 | printk(KERN_CONT | ||
221 | "no resume progress/overflow..."); | ||
222 | |||
223 | conf->error = | ||
224 | ds_selftest_bts_read(conf->tracer, trace, | ||
225 | trace->ds.begin, trace->ds.end); | ||
226 | } else if (trace->ds.top < top) { | ||
227 | /* | ||
228 | * We had a buffer overflow - the entire buffer should | ||
229 | * contain trace records. | ||
230 | */ | ||
231 | conf->error = | ||
232 | ds_selftest_bts_read(conf->tracer, trace, | ||
233 | trace->ds.begin, trace->ds.end); | ||
234 | } else { | ||
235 | /* | ||
236 | * It is quite likely that the buffer did not overflow. | ||
237 | * Let's just check the delta trace. | ||
238 | */ | ||
239 | conf->error = | ||
240 | ds_selftest_bts_read(conf->tracer, trace, top, | ||
241 | trace->ds.top); | ||
242 | } | ||
243 | if (conf->error < 0) | ||
244 | return; | ||
245 | |||
246 | conf->error = 0; | ||
247 | } | ||
248 | |||
249 | static int ds_suspend_bts_wrap(struct bts_tracer *tracer) | ||
250 | { | ||
251 | ds_suspend_bts(tracer); | ||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | static int ds_resume_bts_wrap(struct bts_tracer *tracer) | ||
256 | { | ||
257 | ds_resume_bts(tracer); | ||
258 | return 0; | ||
259 | } | ||
260 | |||
261 | static void ds_release_bts_noirq_wrap(void *tracer) | ||
262 | { | ||
263 | (void)ds_release_bts_noirq(tracer); | ||
264 | } | ||
265 | |||
266 | static int ds_selftest_bts_bad_release_noirq(int cpu, | ||
267 | struct bts_tracer *tracer) | ||
268 | { | ||
269 | int error = -EPERM; | ||
270 | |||
271 | /* Try to release the tracer on the wrong cpu. */ | ||
272 | get_cpu(); | ||
273 | if (cpu != smp_processor_id()) { | ||
274 | error = ds_release_bts_noirq(tracer); | ||
275 | if (error != -EPERM) | ||
276 | printk(KERN_CONT "release on wrong cpu..."); | ||
277 | } | ||
278 | put_cpu(); | ||
279 | |||
280 | return error ? 0 : -1; | ||
281 | } | ||
282 | |||
283 | static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer) | ||
284 | { | ||
285 | struct bts_tracer *tracer; | ||
286 | int error; | ||
287 | |||
288 | /* Try to request cpu tracing while task tracing is active. */ | ||
289 | tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL, | ||
290 | (size_t)-1, BTS_KERNEL); | ||
291 | error = PTR_ERR(tracer); | ||
292 | if (!IS_ERR(tracer)) { | ||
293 | ds_release_bts(tracer); | ||
294 | error = 0; | ||
295 | } | ||
296 | |||
297 | if (error != -EPERM) | ||
298 | printk(KERN_CONT "cpu/task tracing overlap..."); | ||
299 | |||
300 | return error ? 0 : -1; | ||
301 | } | ||
302 | |||
303 | static int ds_selftest_bts_bad_request_task(void *buffer) | ||
304 | { | ||
305 | struct bts_tracer *tracer; | ||
306 | int error; | ||
307 | |||
308 | /* Try to request cpu tracing while task tracing is active. */ | ||
309 | tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL, | ||
310 | (size_t)-1, BTS_KERNEL); | ||
311 | error = PTR_ERR(tracer); | ||
312 | if (!IS_ERR(tracer)) { | ||
313 | error = 0; | ||
314 | ds_release_bts(tracer); | ||
315 | } | ||
316 | |||
317 | if (error != -EPERM) | ||
318 | printk(KERN_CONT "task/cpu tracing overlap..."); | ||
319 | |||
320 | return error ? 0 : -1; | ||
321 | } | ||
322 | |||
323 | int ds_selftest_bts(void) | ||
324 | { | ||
325 | struct ds_selftest_bts_conf conf; | ||
326 | unsigned char buffer[BUFFER_SIZE], *small_buffer; | ||
327 | unsigned long irq; | ||
328 | int cpu; | ||
329 | |||
330 | printk(KERN_INFO "[ds] bts selftest..."); | ||
331 | conf.error = 0; | ||
332 | |||
333 | small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8; | ||
334 | |||
335 | get_online_cpus(); | ||
336 | for_each_online_cpu(cpu) { | ||
337 | conf.suspend = ds_suspend_bts_wrap; | ||
338 | conf.resume = ds_resume_bts_wrap; | ||
339 | conf.tracer = | ||
340 | ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, | ||
341 | NULL, (size_t)-1, BTS_KERNEL); | ||
342 | ds_selftest_bts_cpu(&conf); | ||
343 | if (conf.error >= 0) | ||
344 | conf.error = ds_selftest_bts_bad_request_task(buffer); | ||
345 | ds_release_bts(conf.tracer); | ||
346 | if (conf.error < 0) | ||
347 | goto out; | ||
348 | |||
349 | conf.suspend = ds_suspend_bts_noirq; | ||
350 | conf.resume = ds_resume_bts_noirq; | ||
351 | conf.tracer = | ||
352 | ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, | ||
353 | NULL, (size_t)-1, BTS_KERNEL); | ||
354 | smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1); | ||
355 | if (conf.error >= 0) { | ||
356 | conf.error = | ||
357 | ds_selftest_bts_bad_release_noirq(cpu, | ||
358 | conf.tracer); | ||
359 | /* We must not release the tracer twice. */ | ||
360 | if (conf.error < 0) | ||
361 | conf.tracer = NULL; | ||
362 | } | ||
363 | if (conf.error >= 0) | ||
364 | conf.error = ds_selftest_bts_bad_request_task(buffer); | ||
365 | smp_call_function_single(cpu, ds_release_bts_noirq_wrap, | ||
366 | conf.tracer, 1); | ||
367 | if (conf.error < 0) | ||
368 | goto out; | ||
369 | } | ||
370 | |||
371 | conf.suspend = ds_suspend_bts_wrap; | ||
372 | conf.resume = ds_resume_bts_wrap; | ||
373 | conf.tracer = | ||
374 | ds_request_bts_task(current, buffer, BUFFER_SIZE, | ||
375 | NULL, (size_t)-1, BTS_KERNEL); | ||
376 | ds_selftest_bts_cpu(&conf); | ||
377 | if (conf.error >= 0) | ||
378 | conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); | ||
379 | ds_release_bts(conf.tracer); | ||
380 | if (conf.error < 0) | ||
381 | goto out; | ||
382 | |||
383 | conf.suspend = ds_suspend_bts_noirq; | ||
384 | conf.resume = ds_resume_bts_noirq; | ||
385 | conf.tracer = | ||
386 | ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE, | ||
387 | NULL, (size_t)-1, BTS_KERNEL); | ||
388 | local_irq_save(irq); | ||
389 | ds_selftest_bts_cpu(&conf); | ||
390 | if (conf.error >= 0) | ||
391 | conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); | ||
392 | ds_release_bts_noirq(conf.tracer); | ||
393 | local_irq_restore(irq); | ||
394 | if (conf.error < 0) | ||
395 | goto out; | ||
396 | |||
397 | conf.error = 0; | ||
398 | out: | ||
399 | put_online_cpus(); | ||
400 | printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed")); | ||
401 | |||
402 | return conf.error; | ||
403 | } | ||
404 | |||
405 | int ds_selftest_pebs(void) | ||
406 | { | ||
407 | return 0; | ||
408 | } | ||
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h new file mode 100644 index 000000000000..2ba8745c6663 --- /dev/null +++ b/arch/x86/kernel/ds_selftest.h | |||
@@ -0,0 +1,15 @@ | |||
1 | /* | ||
2 | * Debug Store support - selftest | ||
3 | * | ||
4 | * | ||
5 | * Copyright (C) 2009 Intel Corporation. | ||
6 | * Markus Metzger <markus.t.metzger@intel.com>, 2009 | ||
7 | */ | ||
8 | |||
9 | #ifdef CONFIG_X86_DS_SELFTEST | ||
10 | extern int ds_selftest_bts(void); | ||
11 | extern int ds_selftest_pebs(void); | ||
12 | #else | ||
13 | static inline int ds_selftest_bts(void) { return 0; } | ||
14 | static inline int ds_selftest_pebs(void) { return 0; } | ||
15 | #endif | ||
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ca989158e847..fb5dfb891f0f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/idle.h> | 14 | #include <asm/idle.h> |
15 | #include <asm/uaccess.h> | 15 | #include <asm/uaccess.h> |
16 | #include <asm/i387.h> | 16 | #include <asm/i387.h> |
17 | #include <asm/ds.h> | ||
17 | 18 | ||
18 | unsigned long idle_halt; | 19 | unsigned long idle_halt; |
19 | EXPORT_SYMBOL(idle_halt); | 20 | EXPORT_SYMBOL(idle_halt); |
@@ -45,6 +46,8 @@ void free_thread_xstate(struct task_struct *tsk) | |||
45 | kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); | 46 | kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); |
46 | tsk->thread.xstate = NULL; | 47 | tsk->thread.xstate = NULL; |
47 | } | 48 | } |
49 | |||
50 | WARN(tsk->thread.ds_ctx, "leaking DS context\n"); | ||
48 | } | 51 | } |
49 | 52 | ||
50 | void free_thread_info(struct thread_info *ti) | 53 | void free_thread_info(struct thread_info *ti) |
@@ -83,8 +86,6 @@ void exit_thread(void) | |||
83 | put_cpu(); | 86 | put_cpu(); |
84 | kfree(bp); | 87 | kfree(bp); |
85 | } | 88 | } |
86 | |||
87 | ds_exit_thread(current); | ||
88 | } | 89 | } |
89 | 90 | ||
90 | void flush_thread(void) | 91 | void flush_thread(void) |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 76f8f84043a2..b5e4bfef4472 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -290,7 +290,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
290 | p->thread.io_bitmap_max = 0; | 290 | p->thread.io_bitmap_max = 0; |
291 | } | 291 | } |
292 | 292 | ||
293 | ds_copy_thread(p, current); | 293 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); |
294 | p->thread.ds_ctx = NULL; | ||
294 | 295 | ||
295 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | 296 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); |
296 | p->thread.debugctlmsr = 0; | 297 | p->thread.debugctlmsr = 0; |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b751a41392b1..5a1a1de292ec 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -335,7 +335,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
335 | goto out; | 335 | goto out; |
336 | } | 336 | } |
337 | 337 | ||
338 | ds_copy_thread(p, me); | 338 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); |
339 | p->thread.ds_ctx = NULL; | ||
339 | 340 | ||
340 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | 341 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); |
341 | p->thread.debugctlmsr = 0; | 342 | p->thread.debugctlmsr = 0; |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 23b7c8f017e2..09ecbde91c13 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/audit.h> | 21 | #include <linux/audit.h> |
22 | #include <linux/seccomp.h> | 22 | #include <linux/seccomp.h> |
23 | #include <linux/signal.h> | 23 | #include <linux/signal.h> |
24 | #include <linux/workqueue.h> | ||
24 | 25 | ||
25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
26 | #include <asm/pgtable.h> | 27 | #include <asm/pgtable.h> |
@@ -578,17 +579,130 @@ static int ioperm_get(struct task_struct *target, | |||
578 | } | 579 | } |
579 | 580 | ||
580 | #ifdef CONFIG_X86_PTRACE_BTS | 581 | #ifdef CONFIG_X86_PTRACE_BTS |
582 | /* | ||
583 | * A branch trace store context. | ||
584 | * | ||
585 | * Contexts may only be installed by ptrace_bts_config() and only for | ||
586 | * ptraced tasks. | ||
587 | * | ||
588 | * Contexts are destroyed when the tracee is detached from the tracer. | ||
589 | * The actual destruction work requires interrupts enabled, so the | ||
590 | * work is deferred and will be scheduled during __ptrace_unlink(). | ||
591 | * | ||
592 | * Contexts hold an additional task_struct reference on the traced | ||
593 | * task, as well as a reference on the tracer's mm. | ||
594 | * | ||
595 | * Ptrace already holds a task_struct for the duration of ptrace operations, | ||
596 | * but since destruction is deferred, it may be executed after both | ||
597 | * tracer and tracee exited. | ||
598 | */ | ||
599 | struct bts_context { | ||
600 | /* The branch trace handle. */ | ||
601 | struct bts_tracer *tracer; | ||
602 | |||
603 | /* The buffer used to store the branch trace and its size. */ | ||
604 | void *buffer; | ||
605 | unsigned int size; | ||
606 | |||
607 | /* The mm that paid for the above buffer. */ | ||
608 | struct mm_struct *mm; | ||
609 | |||
610 | /* The task this context belongs to. */ | ||
611 | struct task_struct *task; | ||
612 | |||
613 | /* The signal to send on a bts buffer overflow. */ | ||
614 | unsigned int bts_ovfl_signal; | ||
615 | |||
616 | /* The work struct to destroy a context. */ | ||
617 | struct work_struct work; | ||
618 | }; | ||
619 | |||
620 | static int alloc_bts_buffer(struct bts_context *context, unsigned int size) | ||
621 | { | ||
622 | void *buffer = NULL; | ||
623 | int err = -ENOMEM; | ||
624 | |||
625 | err = account_locked_memory(current->mm, current->signal->rlim, size); | ||
626 | if (err < 0) | ||
627 | return err; | ||
628 | |||
629 | buffer = kzalloc(size, GFP_KERNEL); | ||
630 | if (!buffer) | ||
631 | goto out_refund; | ||
632 | |||
633 | context->buffer = buffer; | ||
634 | context->size = size; | ||
635 | context->mm = get_task_mm(current); | ||
636 | |||
637 | return 0; | ||
638 | |||
639 | out_refund: | ||
640 | refund_locked_memory(current->mm, size); | ||
641 | return err; | ||
642 | } | ||
643 | |||
644 | static inline void free_bts_buffer(struct bts_context *context) | ||
645 | { | ||
646 | if (!context->buffer) | ||
647 | return; | ||
648 | |||
649 | kfree(context->buffer); | ||
650 | context->buffer = NULL; | ||
651 | |||
652 | refund_locked_memory(context->mm, context->size); | ||
653 | context->size = 0; | ||
654 | |||
655 | mmput(context->mm); | ||
656 | context->mm = NULL; | ||
657 | } | ||
658 | |||
659 | static void free_bts_context_work(struct work_struct *w) | ||
660 | { | ||
661 | struct bts_context *context; | ||
662 | |||
663 | context = container_of(w, struct bts_context, work); | ||
664 | |||
665 | ds_release_bts(context->tracer); | ||
666 | put_task_struct(context->task); | ||
667 | free_bts_buffer(context); | ||
668 | kfree(context); | ||
669 | } | ||
670 | |||
671 | static inline void free_bts_context(struct bts_context *context) | ||
672 | { | ||
673 | INIT_WORK(&context->work, free_bts_context_work); | ||
674 | schedule_work(&context->work); | ||
675 | } | ||
676 | |||
677 | static inline struct bts_context *alloc_bts_context(struct task_struct *task) | ||
678 | { | ||
679 | struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL); | ||
680 | if (context) { | ||
681 | context->task = task; | ||
682 | task->bts = context; | ||
683 | |||
684 | get_task_struct(task); | ||
685 | } | ||
686 | |||
687 | return context; | ||
688 | } | ||
689 | |||
581 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, | 690 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, |
582 | struct bts_struct __user *out) | 691 | struct bts_struct __user *out) |
583 | { | 692 | { |
693 | struct bts_context *context; | ||
584 | const struct bts_trace *trace; | 694 | const struct bts_trace *trace; |
585 | struct bts_struct bts; | 695 | struct bts_struct bts; |
586 | const unsigned char *at; | 696 | const unsigned char *at; |
587 | int error; | 697 | int error; |
588 | 698 | ||
589 | trace = ds_read_bts(child->bts); | 699 | context = child->bts; |
700 | if (!context) | ||
701 | return -ESRCH; | ||
702 | |||
703 | trace = ds_read_bts(context->tracer); | ||
590 | if (!trace) | 704 | if (!trace) |
591 | return -EPERM; | 705 | return -ESRCH; |
592 | 706 | ||
593 | at = trace->ds.top - ((index + 1) * trace->ds.size); | 707 | at = trace->ds.top - ((index + 1) * trace->ds.size); |
594 | if ((void *)at < trace->ds.begin) | 708 | if ((void *)at < trace->ds.begin) |
@@ -597,7 +711,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index, | |||
597 | if (!trace->read) | 711 | if (!trace->read) |
598 | return -EOPNOTSUPP; | 712 | return -EOPNOTSUPP; |
599 | 713 | ||
600 | error = trace->read(child->bts, at, &bts); | 714 | error = trace->read(context->tracer, at, &bts); |
601 | if (error < 0) | 715 | if (error < 0) |
602 | return error; | 716 | return error; |
603 | 717 | ||
@@ -611,13 +725,18 @@ static int ptrace_bts_drain(struct task_struct *child, | |||
611 | long size, | 725 | long size, |
612 | struct bts_struct __user *out) | 726 | struct bts_struct __user *out) |
613 | { | 727 | { |
728 | struct bts_context *context; | ||
614 | const struct bts_trace *trace; | 729 | const struct bts_trace *trace; |
615 | const unsigned char *at; | 730 | const unsigned char *at; |
616 | int error, drained = 0; | 731 | int error, drained = 0; |
617 | 732 | ||
618 | trace = ds_read_bts(child->bts); | 733 | context = child->bts; |
734 | if (!context) | ||
735 | return -ESRCH; | ||
736 | |||
737 | trace = ds_read_bts(context->tracer); | ||
619 | if (!trace) | 738 | if (!trace) |
620 | return -EPERM; | 739 | return -ESRCH; |
621 | 740 | ||
622 | if (!trace->read) | 741 | if (!trace->read) |
623 | return -EOPNOTSUPP; | 742 | return -EOPNOTSUPP; |
@@ -628,9 +747,8 @@ static int ptrace_bts_drain(struct task_struct *child, | |||
628 | for (at = trace->ds.begin; (void *)at < trace->ds.top; | 747 | for (at = trace->ds.begin; (void *)at < trace->ds.top; |
629 | out++, drained++, at += trace->ds.size) { | 748 | out++, drained++, at += trace->ds.size) { |
630 | struct bts_struct bts; | 749 | struct bts_struct bts; |
631 | int error; | ||
632 | 750 | ||
633 | error = trace->read(child->bts, at, &bts); | 751 | error = trace->read(context->tracer, at, &bts); |
634 | if (error < 0) | 752 | if (error < 0) |
635 | return error; | 753 | return error; |
636 | 754 | ||
@@ -640,35 +758,18 @@ static int ptrace_bts_drain(struct task_struct *child, | |||
640 | 758 | ||
641 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); | 759 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); |
642 | 760 | ||
643 | error = ds_reset_bts(child->bts); | 761 | error = ds_reset_bts(context->tracer); |
644 | if (error < 0) | 762 | if (error < 0) |
645 | return error; | 763 | return error; |
646 | 764 | ||
647 | return drained; | 765 | return drained; |
648 | } | 766 | } |
649 | 767 | ||
650 | static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size) | ||
651 | { | ||
652 | child->bts_buffer = alloc_locked_buffer(size); | ||
653 | if (!child->bts_buffer) | ||
654 | return -ENOMEM; | ||
655 | |||
656 | child->bts_size = size; | ||
657 | |||
658 | return 0; | ||
659 | } | ||
660 | |||
661 | static void ptrace_bts_free_buffer(struct task_struct *child) | ||
662 | { | ||
663 | free_locked_buffer(child->bts_buffer, child->bts_size); | ||
664 | child->bts_buffer = NULL; | ||
665 | child->bts_size = 0; | ||
666 | } | ||
667 | |||
668 | static int ptrace_bts_config(struct task_struct *child, | 768 | static int ptrace_bts_config(struct task_struct *child, |
669 | long cfg_size, | 769 | long cfg_size, |
670 | const struct ptrace_bts_config __user *ucfg) | 770 | const struct ptrace_bts_config __user *ucfg) |
671 | { | 771 | { |
772 | struct bts_context *context; | ||
672 | struct ptrace_bts_config cfg; | 773 | struct ptrace_bts_config cfg; |
673 | unsigned int flags = 0; | 774 | unsigned int flags = 0; |
674 | 775 | ||
@@ -678,28 +779,33 @@ static int ptrace_bts_config(struct task_struct *child, | |||
678 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | 779 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) |
679 | return -EFAULT; | 780 | return -EFAULT; |
680 | 781 | ||
681 | if (child->bts) { | 782 | context = child->bts; |
682 | ds_release_bts(child->bts); | 783 | if (!context) |
683 | child->bts = NULL; | 784 | context = alloc_bts_context(child); |
684 | } | 785 | if (!context) |
786 | return -ENOMEM; | ||
685 | 787 | ||
686 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { | 788 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { |
687 | if (!cfg.signal) | 789 | if (!cfg.signal) |
688 | return -EINVAL; | 790 | return -EINVAL; |
689 | 791 | ||
690 | child->thread.bts_ovfl_signal = cfg.signal; | ||
691 | return -EOPNOTSUPP; | 792 | return -EOPNOTSUPP; |
793 | context->bts_ovfl_signal = cfg.signal; | ||
692 | } | 794 | } |
693 | 795 | ||
694 | if ((cfg.flags & PTRACE_BTS_O_ALLOC) && | 796 | ds_release_bts(context->tracer); |
695 | (cfg.size != child->bts_size)) { | 797 | context->tracer = NULL; |
696 | int error; | ||
697 | 798 | ||
698 | ptrace_bts_free_buffer(child); | 799 | if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) { |
800 | int err; | ||
699 | 801 | ||
700 | error = ptrace_bts_allocate_buffer(child, cfg.size); | 802 | free_bts_buffer(context); |
701 | if (error < 0) | 803 | if (!cfg.size) |
702 | return error; | 804 | return 0; |
805 | |||
806 | err = alloc_bts_buffer(context, cfg.size); | ||
807 | if (err < 0) | ||
808 | return err; | ||
703 | } | 809 | } |
704 | 810 | ||
705 | if (cfg.flags & PTRACE_BTS_O_TRACE) | 811 | if (cfg.flags & PTRACE_BTS_O_TRACE) |
@@ -708,15 +814,14 @@ static int ptrace_bts_config(struct task_struct *child, | |||
708 | if (cfg.flags & PTRACE_BTS_O_SCHED) | 814 | if (cfg.flags & PTRACE_BTS_O_SCHED) |
709 | flags |= BTS_TIMESTAMPS; | 815 | flags |= BTS_TIMESTAMPS; |
710 | 816 | ||
711 | child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size, | 817 | context->tracer = |
712 | /* ovfl = */ NULL, /* th = */ (size_t)-1, | 818 | ds_request_bts_task(child, context->buffer, context->size, |
713 | flags); | 819 | NULL, (size_t)-1, flags); |
714 | if (IS_ERR(child->bts)) { | 820 | if (unlikely(IS_ERR(context->tracer))) { |
715 | int error = PTR_ERR(child->bts); | 821 | int error = PTR_ERR(context->tracer); |
716 | |||
717 | ptrace_bts_free_buffer(child); | ||
718 | child->bts = NULL; | ||
719 | 822 | ||
823 | free_bts_buffer(context); | ||
824 | context->tracer = NULL; | ||
720 | return error; | 825 | return error; |
721 | } | 826 | } |
722 | 827 | ||
@@ -727,20 +832,25 @@ static int ptrace_bts_status(struct task_struct *child, | |||
727 | long cfg_size, | 832 | long cfg_size, |
728 | struct ptrace_bts_config __user *ucfg) | 833 | struct ptrace_bts_config __user *ucfg) |
729 | { | 834 | { |
835 | struct bts_context *context; | ||
730 | const struct bts_trace *trace; | 836 | const struct bts_trace *trace; |
731 | struct ptrace_bts_config cfg; | 837 | struct ptrace_bts_config cfg; |
732 | 838 | ||
839 | context = child->bts; | ||
840 | if (!context) | ||
841 | return -ESRCH; | ||
842 | |||
733 | if (cfg_size < sizeof(cfg)) | 843 | if (cfg_size < sizeof(cfg)) |
734 | return -EIO; | 844 | return -EIO; |
735 | 845 | ||
736 | trace = ds_read_bts(child->bts); | 846 | trace = ds_read_bts(context->tracer); |
737 | if (!trace) | 847 | if (!trace) |
738 | return -EPERM; | 848 | return -ESRCH; |
739 | 849 | ||
740 | memset(&cfg, 0, sizeof(cfg)); | 850 | memset(&cfg, 0, sizeof(cfg)); |
741 | cfg.size = trace->ds.end - trace->ds.begin; | 851 | cfg.size = trace->ds.end - trace->ds.begin; |
742 | cfg.signal = child->thread.bts_ovfl_signal; | 852 | cfg.signal = context->bts_ovfl_signal; |
743 | cfg.bts_size = sizeof(struct bts_struct); | 853 | cfg.bts_size = sizeof(struct bts_struct); |
744 | 854 | ||
745 | if (cfg.signal) | 855 | if (cfg.signal) |
746 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | 856 | cfg.flags |= PTRACE_BTS_O_SIGNAL; |
@@ -759,80 +869,51 @@ static int ptrace_bts_status(struct task_struct *child, | |||
759 | 869 | ||
760 | static int ptrace_bts_clear(struct task_struct *child) | 870 | static int ptrace_bts_clear(struct task_struct *child) |
761 | { | 871 | { |
872 | struct bts_context *context; | ||
762 | const struct bts_trace *trace; | 873 | const struct bts_trace *trace; |
763 | 874 | ||
764 | trace = ds_read_bts(child->bts); | 875 | context = child->bts; |
876 | if (!context) | ||
877 | return -ESRCH; | ||
878 | |||
879 | trace = ds_read_bts(context->tracer); | ||
765 | if (!trace) | 880 | if (!trace) |
766 | return -EPERM; | 881 | return -ESRCH; |
767 | 882 | ||
768 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); | 883 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); |
769 | 884 | ||
770 | return ds_reset_bts(child->bts); | 885 | return ds_reset_bts(context->tracer); |
771 | } | 886 | } |
772 | 887 | ||
773 | static int ptrace_bts_size(struct task_struct *child) | 888 | static int ptrace_bts_size(struct task_struct *child) |
774 | { | 889 | { |
890 | struct bts_context *context; | ||
775 | const struct bts_trace *trace; | 891 | const struct bts_trace *trace; |
776 | 892 | ||
777 | trace = ds_read_bts(child->bts); | 893 | context = child->bts; |
894 | if (!context) | ||
895 | return -ESRCH; | ||
896 | |||
897 | trace = ds_read_bts(context->tracer); | ||
778 | if (!trace) | 898 | if (!trace) |
779 | return -EPERM; | 899 | return -ESRCH; |
780 | 900 | ||
781 | return (trace->ds.top - trace->ds.begin) / trace->ds.size; | 901 | return (trace->ds.top - trace->ds.begin) / trace->ds.size; |
782 | } | 902 | } |
783 | 903 | ||
784 | static void ptrace_bts_fork(struct task_struct *tsk) | 904 | /* |
785 | { | 905 | * Called from __ptrace_unlink() after the child has been moved back |
786 | tsk->bts = NULL; | 906 | * to its original parent. |
787 | tsk->bts_buffer = NULL; | 907 | */ |
788 | tsk->bts_size = 0; | 908 | void ptrace_bts_untrace(struct task_struct *child) |
789 | tsk->thread.bts_ovfl_signal = 0; | ||
790 | } | ||
791 | |||
792 | static void ptrace_bts_untrace(struct task_struct *child) | ||
793 | { | 909 | { |
794 | if (unlikely(child->bts)) { | 910 | if (unlikely(child->bts)) { |
795 | ds_release_bts(child->bts); | 911 | free_bts_context(child->bts); |
796 | child->bts = NULL; | 912 | child->bts = NULL; |
797 | |||
798 | /* We cannot update total_vm and locked_vm since | ||
799 | child's mm is already gone. But we can reclaim the | ||
800 | memory. */ | ||
801 | kfree(child->bts_buffer); | ||
802 | child->bts_buffer = NULL; | ||
803 | child->bts_size = 0; | ||
804 | } | 913 | } |
805 | } | 914 | } |
806 | |||
807 | static void ptrace_bts_detach(struct task_struct *child) | ||
808 | { | ||
809 | /* | ||
810 | * Ptrace_detach() races with ptrace_untrace() in case | ||
811 | * the child dies and is reaped by another thread. | ||
812 | * | ||
813 | * We only do the memory accounting at this point and | ||
814 | * leave the buffer deallocation and the bts tracer | ||
815 | * release to ptrace_bts_untrace() which will be called | ||
816 | * later on with tasklist_lock held. | ||
817 | */ | ||
818 | release_locked_buffer(child->bts_buffer, child->bts_size); | ||
819 | } | ||
820 | #else | ||
821 | static inline void ptrace_bts_fork(struct task_struct *tsk) {} | ||
822 | static inline void ptrace_bts_detach(struct task_struct *child) {} | ||
823 | static inline void ptrace_bts_untrace(struct task_struct *child) {} | ||
824 | #endif /* CONFIG_X86_PTRACE_BTS */ | 915 | #endif /* CONFIG_X86_PTRACE_BTS */ |
825 | 916 | ||
826 | void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags) | ||
827 | { | ||
828 | ptrace_bts_fork(child); | ||
829 | } | ||
830 | |||
831 | void x86_ptrace_untrace(struct task_struct *child) | ||
832 | { | ||
833 | ptrace_bts_untrace(child); | ||
834 | } | ||
835 | |||
836 | /* | 917 | /* |
837 | * Called by kernel/ptrace.c when detaching.. | 918 | * Called by kernel/ptrace.c when detaching.. |
838 | * | 919 | * |
@@ -844,7 +925,6 @@ void ptrace_disable(struct task_struct *child) | |||
844 | #ifdef TIF_SYSCALL_EMU | 925 | #ifdef TIF_SYSCALL_EMU |
845 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | 926 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); |
846 | #endif | 927 | #endif |
847 | ptrace_bts_detach(child); | ||
848 | } | 928 | } |
849 | 929 | ||
850 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 930 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
diff --git a/include/linux/mm.h b/include/linux/mm.h index bff1f0d475c7..009eabd3c21c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -19,6 +19,7 @@ struct anon_vma; | |||
19 | struct file_ra_state; | 19 | struct file_ra_state; |
20 | struct user_struct; | 20 | struct user_struct; |
21 | struct writeback_control; | 21 | struct writeback_control; |
22 | struct rlimit; | ||
22 | 23 | ||
23 | #ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ | 24 | #ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ |
24 | extern unsigned long max_mapnr; | 25 | extern unsigned long max_mapnr; |
@@ -1319,8 +1320,8 @@ int vmemmap_populate_basepages(struct page *start_page, | |||
1319 | int vmemmap_populate(struct page *start_page, unsigned long pages, int node); | 1320 | int vmemmap_populate(struct page *start_page, unsigned long pages, int node); |
1320 | void vmemmap_populate_print_last(void); | 1321 | void vmemmap_populate_print_last(void); |
1321 | 1322 | ||
1322 | extern void *alloc_locked_buffer(size_t size); | 1323 | extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, |
1323 | extern void free_locked_buffer(void *buffer, size_t size); | 1324 | size_t size); |
1324 | extern void release_locked_buffer(void *buffer, size_t size); | 1325 | extern void refund_locked_memory(struct mm_struct *mm, size_t size); |
1325 | #endif /* __KERNEL__ */ | 1326 | #endif /* __KERNEL__ */ |
1326 | #endif /* _LINUX_MM_H */ | 1327 | #endif /* _LINUX_MM_H */ |
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 67c15653fc23..59e133d39d50 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h | |||
@@ -95,7 +95,6 @@ extern void __ptrace_link(struct task_struct *child, | |||
95 | struct task_struct *new_parent); | 95 | struct task_struct *new_parent); |
96 | extern void __ptrace_unlink(struct task_struct *child); | 96 | extern void __ptrace_unlink(struct task_struct *child); |
97 | extern void exit_ptrace(struct task_struct *tracer); | 97 | extern void exit_ptrace(struct task_struct *tracer); |
98 | extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags); | ||
99 | #define PTRACE_MODE_READ 1 | 98 | #define PTRACE_MODE_READ 1 |
100 | #define PTRACE_MODE_ATTACH 2 | 99 | #define PTRACE_MODE_ATTACH 2 |
101 | /* Returns 0 on success, -errno on denial. */ | 100 | /* Returns 0 on success, -errno on denial. */ |
@@ -327,15 +326,6 @@ static inline void user_enable_block_step(struct task_struct *task) | |||
327 | #define arch_ptrace_untrace(task) do { } while (0) | 326 | #define arch_ptrace_untrace(task) do { } while (0) |
328 | #endif | 327 | #endif |
329 | 328 | ||
330 | #ifndef arch_ptrace_fork | ||
331 | /* | ||
332 | * Do machine-specific work to initialize a new task. | ||
333 | * | ||
334 | * This is called from copy_process(). | ||
335 | */ | ||
336 | #define arch_ptrace_fork(child, clone_flags) do { } while (0) | ||
337 | #endif | ||
338 | |||
339 | extern int task_current_syscall(struct task_struct *target, long *callno, | 329 | extern int task_current_syscall(struct task_struct *target, long *callno, |
340 | unsigned long args[6], unsigned int maxargs, | 330 | unsigned long args[6], unsigned int maxargs, |
341 | unsigned long *sp, unsigned long *pc); | 331 | unsigned long *sp, unsigned long *pc); |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 7ede5e490913..1ed4ef520680 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -96,8 +96,8 @@ struct exec_domain; | |||
96 | struct futex_pi_state; | 96 | struct futex_pi_state; |
97 | struct robust_list_head; | 97 | struct robust_list_head; |
98 | struct bio; | 98 | struct bio; |
99 | struct bts_tracer; | ||
100 | struct fs_struct; | 99 | struct fs_struct; |
100 | struct bts_context; | ||
101 | 101 | ||
102 | /* | 102 | /* |
103 | * List of flags we want to share for kernel threads, | 103 | * List of flags we want to share for kernel threads, |
@@ -1209,18 +1209,11 @@ struct task_struct { | |||
1209 | struct list_head ptraced; | 1209 | struct list_head ptraced; |
1210 | struct list_head ptrace_entry; | 1210 | struct list_head ptrace_entry; |
1211 | 1211 | ||
1212 | #ifdef CONFIG_X86_PTRACE_BTS | ||
1213 | /* | 1212 | /* |
1214 | * This is the tracer handle for the ptrace BTS extension. | 1213 | * This is the tracer handle for the ptrace BTS extension. |
1215 | * This field actually belongs to the ptracer task. | 1214 | * This field actually belongs to the ptracer task. |
1216 | */ | 1215 | */ |
1217 | struct bts_tracer *bts; | 1216 | struct bts_context *bts; |
1218 | /* | ||
1219 | * The buffer to hold the BTS data. | ||
1220 | */ | ||
1221 | void *bts_buffer; | ||
1222 | size_t bts_size; | ||
1223 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
1224 | 1217 | ||
1225 | /* PID/PID hash table linkage. */ | 1218 | /* PID/PID hash table linkage. */ |
1226 | struct pid_link pids[PIDTYPE_MAX]; | 1219 | struct pid_link pids[PIDTYPE_MAX]; |
@@ -2003,8 +1996,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from); | |||
2003 | extern char *get_task_comm(char *to, struct task_struct *tsk); | 1996 | extern char *get_task_comm(char *to, struct task_struct *tsk); |
2004 | 1997 | ||
2005 | #ifdef CONFIG_SMP | 1998 | #ifdef CONFIG_SMP |
1999 | extern void wait_task_context_switch(struct task_struct *p); | ||
2006 | extern unsigned long wait_task_inactive(struct task_struct *, long match_state); | 2000 | extern unsigned long wait_task_inactive(struct task_struct *, long match_state); |
2007 | #else | 2001 | #else |
2002 | static inline void wait_task_context_switch(struct task_struct *p) {} | ||
2008 | static inline unsigned long wait_task_inactive(struct task_struct *p, | 2003 | static inline unsigned long wait_task_inactive(struct task_struct *p, |
2009 | long match_state) | 2004 | long match_state) |
2010 | { | 2005 | { |
diff --git a/kernel/Makefile b/kernel/Makefile index 42423665660a..a35eee3436de 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -93,6 +93,7 @@ obj-$(CONFIG_LATENCYTOP) += latencytop.o | |||
93 | obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o | 93 | obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o |
94 | obj-$(CONFIG_FUNCTION_TRACER) += trace/ | 94 | obj-$(CONFIG_FUNCTION_TRACER) += trace/ |
95 | obj-$(CONFIG_TRACING) += trace/ | 95 | obj-$(CONFIG_TRACING) += trace/ |
96 | obj-$(CONFIG_X86_DS) += trace/ | ||
96 | obj-$(CONFIG_SMP) += sched_cpupri.o | 97 | obj-$(CONFIG_SMP) += sched_cpupri.o |
97 | obj-$(CONFIG_SLOW_WORK) += slow-work.o | 98 | obj-$(CONFIG_SLOW_WORK) += slow-work.o |
98 | 99 | ||
diff --git a/kernel/fork.c b/kernel/fork.c index 085f73ebcea6..711468f3db2a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -1088,8 +1088,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1088 | #ifdef CONFIG_DEBUG_MUTEXES | 1088 | #ifdef CONFIG_DEBUG_MUTEXES |
1089 | p->blocked_on = NULL; /* not blocked yet */ | 1089 | p->blocked_on = NULL; /* not blocked yet */ |
1090 | #endif | 1090 | #endif |
1091 | if (unlikely(current->ptrace)) | 1091 | |
1092 | ptrace_fork(p, clone_flags); | 1092 | p->bts = NULL; |
1093 | 1093 | ||
1094 | /* Perform scheduler related setup. Assign this task to a CPU. */ | 1094 | /* Perform scheduler related setup. Assign this task to a CPU. */ |
1095 | sched_fork(p, clone_flags); | 1095 | sched_fork(p, clone_flags); |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 0692ab5a0d67..e950805f8630 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -25,16 +25,6 @@ | |||
25 | 25 | ||
26 | 26 | ||
27 | /* | 27 | /* |
28 | * Initialize a new task whose father had been ptraced. | ||
29 | * | ||
30 | * Called from copy_process(). | ||
31 | */ | ||
32 | void ptrace_fork(struct task_struct *child, unsigned long clone_flags) | ||
33 | { | ||
34 | arch_ptrace_fork(child, clone_flags); | ||
35 | } | ||
36 | |||
37 | /* | ||
38 | * ptrace a task: make the debugger its new parent and | 28 | * ptrace a task: make the debugger its new parent and |
39 | * move it to the ptrace list. | 29 | * move it to the ptrace list. |
40 | * | 30 | * |
diff --git a/kernel/sched.c b/kernel/sched.c index 14a19b17674e..6530a27052f3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2011,6 +2011,49 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | |||
2011 | } | 2011 | } |
2012 | 2012 | ||
2013 | /* | 2013 | /* |
2014 | * wait_task_context_switch - wait for a thread to complete at least one | ||
2015 | * context switch. | ||
2016 | * | ||
2017 | * @p must not be current. | ||
2018 | */ | ||
2019 | void wait_task_context_switch(struct task_struct *p) | ||
2020 | { | ||
2021 | unsigned long nvcsw, nivcsw, flags; | ||
2022 | int running; | ||
2023 | struct rq *rq; | ||
2024 | |||
2025 | nvcsw = p->nvcsw; | ||
2026 | nivcsw = p->nivcsw; | ||
2027 | for (;;) { | ||
2028 | /* | ||
2029 | * The runqueue is assigned before the actual context | ||
2030 | * switch. We need to take the runqueue lock. | ||
2031 | * | ||
2032 | * We could check initially without the lock but it is | ||
2033 | * very likely that we need to take the lock in every | ||
2034 | * iteration. | ||
2035 | */ | ||
2036 | rq = task_rq_lock(p, &flags); | ||
2037 | running = task_running(rq, p); | ||
2038 | task_rq_unlock(rq, &flags); | ||
2039 | |||
2040 | if (likely(!running)) | ||
2041 | break; | ||
2042 | /* | ||
2043 | * The switch count is incremented before the actual | ||
2044 | * context switch. We thus wait for two switches to be | ||
2045 | * sure at least one completed. | ||
2046 | */ | ||
2047 | if ((p->nvcsw - nvcsw) > 1) | ||
2048 | break; | ||
2049 | if ((p->nivcsw - nivcsw) > 1) | ||
2050 | break; | ||
2051 | |||
2052 | cpu_relax(); | ||
2053 | } | ||
2054 | } | ||
2055 | |||
2056 | /* | ||
2014 | * wait_task_inactive - wait for a thread to unschedule. | 2057 | * wait_task_inactive - wait for a thread to unschedule. |
2015 | * | 2058 | * |
2016 | * If @match_state is nonzero, it's the @p->state value just checked and | 2059 | * If @match_state is nonzero, it's the @p->state value just checked and |
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 7c34cbfff96e..06b85850fab4 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile | |||
@@ -15,12 +15,17 @@ ifdef CONFIG_TRACING_BRANCHES | |||
15 | KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING | 15 | KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING |
16 | endif | 16 | endif |
17 | 17 | ||
18 | # | ||
19 | # Make the trace clocks available generally: it's infrastructure | ||
20 | # relied on by ptrace for example: | ||
21 | # | ||
22 | obj-y += trace_clock.o | ||
23 | |||
18 | obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o | 24 | obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o |
19 | obj-$(CONFIG_RING_BUFFER) += ring_buffer.o | 25 | obj-$(CONFIG_RING_BUFFER) += ring_buffer.o |
20 | obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o | 26 | obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o |
21 | 27 | ||
22 | obj-$(CONFIG_TRACING) += trace.o | 28 | obj-$(CONFIG_TRACING) += trace.o |
23 | obj-$(CONFIG_TRACING) += trace_clock.o | ||
24 | obj-$(CONFIG_TRACING) += trace_output.o | 29 | obj-$(CONFIG_TRACING) += trace_output.o |
25 | obj-$(CONFIG_TRACING) += trace_stat.o | 30 | obj-$(CONFIG_TRACING) += trace_stat.o |
26 | obj-$(CONFIG_TRACING) += trace_printk.o | 31 | obj-$(CONFIG_TRACING) += trace_printk.o |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ba25793ffe67..6e735d4771f8 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
@@ -538,6 +538,8 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace, | |||
538 | struct trace_array *tr); | 538 | struct trace_array *tr); |
539 | extern int trace_selftest_startup_branch(struct tracer *trace, | 539 | extern int trace_selftest_startup_branch(struct tracer *trace, |
540 | struct trace_array *tr); | 540 | struct trace_array *tr); |
541 | extern int trace_selftest_startup_hw_branches(struct tracer *trace, | ||
542 | struct trace_array *tr); | ||
541 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ | 543 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ |
542 | 544 | ||
543 | extern void *head_page(struct trace_array_cpu *data); | 545 | extern void *head_page(struct trace_array_cpu *data); |
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c index 8683d50a753a..ca7d7c4d0c2a 100644 --- a/kernel/trace/trace_hw_branches.c +++ b/kernel/trace/trace_hw_branches.c | |||
@@ -1,10 +1,9 @@ | |||
1 | /* | 1 | /* |
2 | * h/w branch tracer for x86 based on bts | 2 | * h/w branch tracer for x86 based on BTS |
3 | * | 3 | * |
4 | * Copyright (C) 2008-2009 Intel Corporation. | 4 | * Copyright (C) 2008-2009 Intel Corporation. |
5 | * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009 | 5 | * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009 |
6 | */ | 6 | */ |
7 | #include <linux/spinlock.h> | ||
8 | #include <linux/kallsyms.h> | 7 | #include <linux/kallsyms.h> |
9 | #include <linux/debugfs.h> | 8 | #include <linux/debugfs.h> |
10 | #include <linux/ftrace.h> | 9 | #include <linux/ftrace.h> |
@@ -15,110 +14,119 @@ | |||
15 | 14 | ||
16 | #include <asm/ds.h> | 15 | #include <asm/ds.h> |
17 | 16 | ||
18 | #include "trace.h" | ||
19 | #include "trace_output.h" | 17 | #include "trace_output.h" |
18 | #include "trace.h" | ||
20 | 19 | ||
21 | 20 | ||
22 | #define SIZEOF_BTS (1 << 13) | 21 | #define BTS_BUFFER_SIZE (1 << 13) |
23 | 22 | ||
24 | /* | ||
25 | * The tracer lock protects the below per-cpu tracer array. | ||
26 | * It needs to be held to: | ||
27 | * - start tracing on all cpus | ||
28 | * - stop tracing on all cpus | ||
29 | * - start tracing on a single hotplug cpu | ||
30 | * - stop tracing on a single hotplug cpu | ||
31 | * - read the trace from all cpus | ||
32 | * - read the trace from a single cpu | ||
33 | */ | ||
34 | static DEFINE_SPINLOCK(bts_tracer_lock); | ||
35 | static DEFINE_PER_CPU(struct bts_tracer *, tracer); | 23 | static DEFINE_PER_CPU(struct bts_tracer *, tracer); |
36 | static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); | 24 | static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer); |
37 | 25 | ||
38 | #define this_tracer per_cpu(tracer, smp_processor_id()) | 26 | #define this_tracer per_cpu(tracer, smp_processor_id()) |
39 | #define this_buffer per_cpu(buffer, smp_processor_id()) | ||
40 | 27 | ||
41 | static int __read_mostly trace_hw_branches_enabled; | 28 | static int trace_hw_branches_enabled __read_mostly; |
29 | static int trace_hw_branches_suspended __read_mostly; | ||
42 | static struct trace_array *hw_branch_trace __read_mostly; | 30 | static struct trace_array *hw_branch_trace __read_mostly; |
43 | 31 | ||
44 | 32 | ||
45 | /* | 33 | static void bts_trace_init_cpu(int cpu) |
46 | * Start tracing on the current cpu. | ||
47 | * The argument is ignored. | ||
48 | * | ||
49 | * pre: bts_tracer_lock must be locked. | ||
50 | */ | ||
51 | static void bts_trace_start_cpu(void *arg) | ||
52 | { | 34 | { |
53 | if (this_tracer) | 35 | per_cpu(tracer, cpu) = |
54 | ds_release_bts(this_tracer); | 36 | ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE, |
55 | 37 | NULL, (size_t)-1, BTS_KERNEL); | |
56 | this_tracer = | 38 | |
57 | ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS, | 39 | if (IS_ERR(per_cpu(tracer, cpu))) |
58 | /* ovfl = */ NULL, /* th = */ (size_t)-1, | 40 | per_cpu(tracer, cpu) = NULL; |
59 | BTS_KERNEL); | ||
60 | if (IS_ERR(this_tracer)) { | ||
61 | this_tracer = NULL; | ||
62 | return; | ||
63 | } | ||
64 | } | 41 | } |
65 | 42 | ||
66 | static void bts_trace_start(struct trace_array *tr) | 43 | static int bts_trace_init(struct trace_array *tr) |
67 | { | 44 | { |
68 | spin_lock(&bts_tracer_lock); | 45 | int cpu; |
46 | |||
47 | hw_branch_trace = tr; | ||
48 | trace_hw_branches_enabled = 0; | ||
69 | 49 | ||
70 | on_each_cpu(bts_trace_start_cpu, NULL, 1); | 50 | get_online_cpus(); |
71 | trace_hw_branches_enabled = 1; | 51 | for_each_online_cpu(cpu) { |
52 | bts_trace_init_cpu(cpu); | ||
72 | 53 | ||
73 | spin_unlock(&bts_tracer_lock); | 54 | if (likely(per_cpu(tracer, cpu))) |
55 | trace_hw_branches_enabled = 1; | ||
56 | } | ||
57 | trace_hw_branches_suspended = 0; | ||
58 | put_online_cpus(); | ||
59 | |||
60 | /* If we could not enable tracing on a single cpu, we fail. */ | ||
61 | return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP; | ||
74 | } | 62 | } |
75 | 63 | ||
76 | /* | 64 | static void bts_trace_reset(struct trace_array *tr) |
77 | * Stop tracing on the current cpu. | ||
78 | * The argument is ignored. | ||
79 | * | ||
80 | * pre: bts_tracer_lock must be locked. | ||
81 | */ | ||
82 | static void bts_trace_stop_cpu(void *arg) | ||
83 | { | 65 | { |
84 | if (this_tracer) { | 66 | int cpu; |
85 | ds_release_bts(this_tracer); | 67 | |
86 | this_tracer = NULL; | 68 | get_online_cpus(); |
69 | for_each_online_cpu(cpu) { | ||
70 | if (likely(per_cpu(tracer, cpu))) { | ||
71 | ds_release_bts(per_cpu(tracer, cpu)); | ||
72 | per_cpu(tracer, cpu) = NULL; | ||
73 | } | ||
87 | } | 74 | } |
75 | trace_hw_branches_enabled = 0; | ||
76 | trace_hw_branches_suspended = 0; | ||
77 | put_online_cpus(); | ||
88 | } | 78 | } |
89 | 79 | ||
90 | static void bts_trace_stop(struct trace_array *tr) | 80 | static void bts_trace_start(struct trace_array *tr) |
91 | { | 81 | { |
92 | spin_lock(&bts_tracer_lock); | 82 | int cpu; |
93 | 83 | ||
94 | trace_hw_branches_enabled = 0; | 84 | get_online_cpus(); |
95 | on_each_cpu(bts_trace_stop_cpu, NULL, 1); | 85 | for_each_online_cpu(cpu) |
86 | if (likely(per_cpu(tracer, cpu))) | ||
87 | ds_resume_bts(per_cpu(tracer, cpu)); | ||
88 | trace_hw_branches_suspended = 0; | ||
89 | put_online_cpus(); | ||
90 | } | ||
96 | 91 | ||
97 | spin_unlock(&bts_tracer_lock); | 92 | static void bts_trace_stop(struct trace_array *tr) |
93 | { | ||
94 | int cpu; | ||
95 | |||
96 | get_online_cpus(); | ||
97 | for_each_online_cpu(cpu) | ||
98 | if (likely(per_cpu(tracer, cpu))) | ||
99 | ds_suspend_bts(per_cpu(tracer, cpu)); | ||
100 | trace_hw_branches_suspended = 1; | ||
101 | put_online_cpus(); | ||
98 | } | 102 | } |
99 | 103 | ||
100 | static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, | 104 | static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, |
101 | unsigned long action, void *hcpu) | 105 | unsigned long action, void *hcpu) |
102 | { | 106 | { |
103 | unsigned int cpu = (unsigned long)hcpu; | 107 | int cpu = (long)hcpu; |
104 | |||
105 | spin_lock(&bts_tracer_lock); | ||
106 | |||
107 | if (!trace_hw_branches_enabled) | ||
108 | goto out; | ||
109 | 108 | ||
110 | switch (action) { | 109 | switch (action) { |
111 | case CPU_ONLINE: | 110 | case CPU_ONLINE: |
112 | case CPU_DOWN_FAILED: | 111 | case CPU_DOWN_FAILED: |
113 | smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1); | 112 | /* The notification is sent with interrupts enabled. */ |
113 | if (trace_hw_branches_enabled) { | ||
114 | bts_trace_init_cpu(cpu); | ||
115 | |||
116 | if (trace_hw_branches_suspended && | ||
117 | likely(per_cpu(tracer, cpu))) | ||
118 | ds_suspend_bts(per_cpu(tracer, cpu)); | ||
119 | } | ||
114 | break; | 120 | break; |
121 | |||
115 | case CPU_DOWN_PREPARE: | 122 | case CPU_DOWN_PREPARE: |
116 | smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); | 123 | /* The notification is sent with interrupts enabled. */ |
117 | break; | 124 | if (likely(per_cpu(tracer, cpu))) { |
125 | ds_release_bts(per_cpu(tracer, cpu)); | ||
126 | per_cpu(tracer, cpu) = NULL; | ||
127 | } | ||
118 | } | 128 | } |
119 | 129 | ||
120 | out: | ||
121 | spin_unlock(&bts_tracer_lock); | ||
122 | return NOTIFY_DONE; | 130 | return NOTIFY_DONE; |
123 | } | 131 | } |
124 | 132 | ||
@@ -126,20 +134,6 @@ static struct notifier_block bts_hotcpu_notifier __cpuinitdata = { | |||
126 | .notifier_call = bts_hotcpu_handler | 134 | .notifier_call = bts_hotcpu_handler |
127 | }; | 135 | }; |
128 | 136 | ||
129 | static int bts_trace_init(struct trace_array *tr) | ||
130 | { | ||
131 | hw_branch_trace = tr; | ||
132 | |||
133 | bts_trace_start(tr); | ||
134 | |||
135 | return 0; | ||
136 | } | ||
137 | |||
138 | static void bts_trace_reset(struct trace_array *tr) | ||
139 | { | ||
140 | bts_trace_stop(tr); | ||
141 | } | ||
142 | |||
143 | static void bts_trace_print_header(struct seq_file *m) | 137 | static void bts_trace_print_header(struct seq_file *m) |
144 | { | 138 | { |
145 | seq_puts(m, "# CPU# TO <- FROM\n"); | 139 | seq_puts(m, "# CPU# TO <- FROM\n"); |
@@ -147,10 +141,10 @@ static void bts_trace_print_header(struct seq_file *m) | |||
147 | 141 | ||
148 | static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) | 142 | static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) |
149 | { | 143 | { |
144 | unsigned long symflags = TRACE_ITER_SYM_OFFSET; | ||
150 | struct trace_entry *entry = iter->ent; | 145 | struct trace_entry *entry = iter->ent; |
151 | struct trace_seq *seq = &iter->seq; | 146 | struct trace_seq *seq = &iter->seq; |
152 | struct hw_branch_entry *it; | 147 | struct hw_branch_entry *it; |
153 | unsigned long symflags = TRACE_ITER_SYM_OFFSET; | ||
154 | 148 | ||
155 | trace_assign_type(it, entry); | 149 | trace_assign_type(it, entry); |
156 | 150 | ||
@@ -226,11 +220,11 @@ static void trace_bts_at(const struct bts_trace *trace, void *at) | |||
226 | /* | 220 | /* |
227 | * Collect the trace on the current cpu and write it into the ftrace buffer. | 221 | * Collect the trace on the current cpu and write it into the ftrace buffer. |
228 | * | 222 | * |
229 | * pre: bts_tracer_lock must be locked | 223 | * pre: tracing must be suspended on the current cpu |
230 | */ | 224 | */ |
231 | static void trace_bts_cpu(void *arg) | 225 | static void trace_bts_cpu(void *arg) |
232 | { | 226 | { |
233 | struct trace_array *tr = (struct trace_array *) arg; | 227 | struct trace_array *tr = (struct trace_array *)arg; |
234 | const struct bts_trace *trace; | 228 | const struct bts_trace *trace; |
235 | unsigned char *at; | 229 | unsigned char *at; |
236 | 230 | ||
@@ -243,10 +237,9 @@ static void trace_bts_cpu(void *arg) | |||
243 | if (unlikely(!this_tracer)) | 237 | if (unlikely(!this_tracer)) |
244 | return; | 238 | return; |
245 | 239 | ||
246 | ds_suspend_bts(this_tracer); | ||
247 | trace = ds_read_bts(this_tracer); | 240 | trace = ds_read_bts(this_tracer); |
248 | if (!trace) | 241 | if (!trace) |
249 | goto out; | 242 | return; |
250 | 243 | ||
251 | for (at = trace->ds.top; (void *)at < trace->ds.end; | 244 | for (at = trace->ds.top; (void *)at < trace->ds.end; |
252 | at += trace->ds.size) | 245 | at += trace->ds.size) |
@@ -255,18 +248,27 @@ static void trace_bts_cpu(void *arg) | |||
255 | for (at = trace->ds.begin; (void *)at < trace->ds.top; | 248 | for (at = trace->ds.begin; (void *)at < trace->ds.top; |
256 | at += trace->ds.size) | 249 | at += trace->ds.size) |
257 | trace_bts_at(trace, at); | 250 | trace_bts_at(trace, at); |
258 | |||
259 | out: | ||
260 | ds_resume_bts(this_tracer); | ||
261 | } | 251 | } |
262 | 252 | ||
263 | static void trace_bts_prepare(struct trace_iterator *iter) | 253 | static void trace_bts_prepare(struct trace_iterator *iter) |
264 | { | 254 | { |
265 | spin_lock(&bts_tracer_lock); | 255 | int cpu; |
266 | 256 | ||
257 | get_online_cpus(); | ||
258 | for_each_online_cpu(cpu) | ||
259 | if (likely(per_cpu(tracer, cpu))) | ||
260 | ds_suspend_bts(per_cpu(tracer, cpu)); | ||
261 | /* | ||
262 | * We need to collect the trace on the respective cpu since ftrace | ||
263 | * implicitly adds the record for the current cpu. | ||
264 | * Once that is more flexible, we could collect the data from any cpu. | ||
265 | */ | ||
267 | on_each_cpu(trace_bts_cpu, iter->tr, 1); | 266 | on_each_cpu(trace_bts_cpu, iter->tr, 1); |
268 | 267 | ||
269 | spin_unlock(&bts_tracer_lock); | 268 | for_each_online_cpu(cpu) |
269 | if (likely(per_cpu(tracer, cpu))) | ||
270 | ds_resume_bts(per_cpu(tracer, cpu)); | ||
271 | put_online_cpus(); | ||
270 | } | 272 | } |
271 | 273 | ||
272 | static void trace_bts_close(struct trace_iterator *iter) | 274 | static void trace_bts_close(struct trace_iterator *iter) |
@@ -276,11 +278,11 @@ static void trace_bts_close(struct trace_iterator *iter) | |||
276 | 278 | ||
277 | void trace_hw_branch_oops(void) | 279 | void trace_hw_branch_oops(void) |
278 | { | 280 | { |
279 | spin_lock(&bts_tracer_lock); | 281 | if (this_tracer) { |
280 | 282 | ds_suspend_bts_noirq(this_tracer); | |
281 | trace_bts_cpu(hw_branch_trace); | 283 | trace_bts_cpu(hw_branch_trace); |
282 | 284 | ds_resume_bts_noirq(this_tracer); | |
283 | spin_unlock(&bts_tracer_lock); | 285 | } |
284 | } | 286 | } |
285 | 287 | ||
286 | struct tracer bts_tracer __read_mostly = | 288 | struct tracer bts_tracer __read_mostly = |
@@ -293,7 +295,10 @@ struct tracer bts_tracer __read_mostly = | |||
293 | .start = bts_trace_start, | 295 | .start = bts_trace_start, |
294 | .stop = bts_trace_stop, | 296 | .stop = bts_trace_stop, |
295 | .open = trace_bts_prepare, | 297 | .open = trace_bts_prepare, |
296 | .close = trace_bts_close | 298 | .close = trace_bts_close, |
299 | #ifdef CONFIG_FTRACE_SELFTEST | ||
300 | .selftest = trace_selftest_startup_hw_branches, | ||
301 | #endif /* CONFIG_FTRACE_SELFTEST */ | ||
297 | }; | 302 | }; |
298 | 303 | ||
299 | __init static int init_bts_trace(void) | 304 | __init static int init_bts_trace(void) |
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 08f4eb2763d1..00dd6485bdd7 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c | |||
@@ -16,6 +16,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) | |||
16 | case TRACE_BRANCH: | 16 | case TRACE_BRANCH: |
17 | case TRACE_GRAPH_ENT: | 17 | case TRACE_GRAPH_ENT: |
18 | case TRACE_GRAPH_RET: | 18 | case TRACE_GRAPH_RET: |
19 | case TRACE_HW_BRANCHES: | ||
19 | return 1; | 20 | return 1; |
20 | } | 21 | } |
21 | return 0; | 22 | return 0; |
@@ -188,6 +189,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, | |||
188 | #else | 189 | #else |
189 | # define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; }) | 190 | # define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; }) |
190 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 191 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
192 | |||
191 | /* | 193 | /* |
192 | * Simple verification test of ftrace function tracer. | 194 | * Simple verification test of ftrace function tracer. |
193 | * Enable ftrace, sleep 1/10 second, and then read the trace | 195 | * Enable ftrace, sleep 1/10 second, and then read the trace |
@@ -749,3 +751,59 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) | |||
749 | return ret; | 751 | return ret; |
750 | } | 752 | } |
751 | #endif /* CONFIG_BRANCH_TRACER */ | 753 | #endif /* CONFIG_BRANCH_TRACER */ |
754 | |||
755 | #ifdef CONFIG_HW_BRANCH_TRACER | ||
756 | int | ||
757 | trace_selftest_startup_hw_branches(struct tracer *trace, | ||
758 | struct trace_array *tr) | ||
759 | { | ||
760 | struct trace_iterator *iter; | ||
761 | struct tracer tracer; | ||
762 | unsigned long count; | ||
763 | int ret; | ||
764 | |||
765 | if (!trace->open) { | ||
766 | printk(KERN_CONT "missing open function..."); | ||
767 | return -1; | ||
768 | } | ||
769 | |||
770 | ret = tracer_init(trace, tr); | ||
771 | if (ret) { | ||
772 | warn_failed_init_tracer(trace, ret); | ||
773 | return ret; | ||
774 | } | ||
775 | |||
776 | /* | ||
777 | * The hw-branch tracer needs to collect the trace from the various | ||
778 | * cpu trace buffers - before tracing is stopped. | ||
779 | */ | ||
780 | iter = kzalloc(sizeof(*iter), GFP_KERNEL); | ||
781 | if (!iter) | ||
782 | return -ENOMEM; | ||
783 | |||
784 | memcpy(&tracer, trace, sizeof(tracer)); | ||
785 | |||
786 | iter->trace = &tracer; | ||
787 | iter->tr = tr; | ||
788 | iter->pos = -1; | ||
789 | mutex_init(&iter->mutex); | ||
790 | |||
791 | trace->open(iter); | ||
792 | |||
793 | mutex_destroy(&iter->mutex); | ||
794 | kfree(iter); | ||
795 | |||
796 | tracing_stop(); | ||
797 | |||
798 | ret = trace_test_buffer(tr, &count); | ||
799 | trace->reset(tr); | ||
800 | tracing_start(); | ||
801 | |||
802 | if (!ret && !count) { | ||
803 | printk(KERN_CONT "no entries found.."); | ||
804 | ret = -1; | ||
805 | } | ||
806 | |||
807 | return ret; | ||
808 | } | ||
809 | #endif /* CONFIG_HW_BRANCH_TRACER */ | ||
diff --git a/mm/mlock.c b/mm/mlock.c index cbe9e0581b75..ac130433c7d3 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -629,52 +629,43 @@ void user_shm_unlock(size_t size, struct user_struct *user) | |||
629 | free_uid(user); | 629 | free_uid(user); |
630 | } | 630 | } |
631 | 631 | ||
632 | void *alloc_locked_buffer(size_t size) | 632 | int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, |
633 | size_t size) | ||
633 | { | 634 | { |
634 | unsigned long rlim, vm, pgsz; | 635 | unsigned long lim, vm, pgsz; |
635 | void *buffer = NULL; | 636 | int error = -ENOMEM; |
636 | 637 | ||
637 | pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; | 638 | pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; |
638 | 639 | ||
639 | down_write(¤t->mm->mmap_sem); | 640 | down_write(&mm->mmap_sem); |
640 | |||
641 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | ||
642 | vm = current->mm->total_vm + pgsz; | ||
643 | if (rlim < vm) | ||
644 | goto out; | ||
645 | 641 | ||
646 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | 642 | lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; |
647 | vm = current->mm->locked_vm + pgsz; | 643 | vm = mm->total_vm + pgsz; |
648 | if (rlim < vm) | 644 | if (lim < vm) |
649 | goto out; | 645 | goto out; |
650 | 646 | ||
651 | buffer = kzalloc(size, GFP_KERNEL); | 647 | lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; |
652 | if (!buffer) | 648 | vm = mm->locked_vm + pgsz; |
649 | if (lim < vm) | ||
653 | goto out; | 650 | goto out; |
654 | 651 | ||
655 | current->mm->total_vm += pgsz; | 652 | mm->total_vm += pgsz; |
656 | current->mm->locked_vm += pgsz; | 653 | mm->locked_vm += pgsz; |
657 | 654 | ||
655 | error = 0; | ||
658 | out: | 656 | out: |
659 | up_write(¤t->mm->mmap_sem); | 657 | up_write(&mm->mmap_sem); |
660 | return buffer; | 658 | return error; |
661 | } | 659 | } |
662 | 660 | ||
663 | void release_locked_buffer(void *buffer, size_t size) | 661 | void refund_locked_memory(struct mm_struct *mm, size_t size) |
664 | { | 662 | { |
665 | unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; | 663 | unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; |
666 | 664 | ||
667 | down_write(¤t->mm->mmap_sem); | 665 | down_write(&mm->mmap_sem); |
668 | |||
669 | current->mm->total_vm -= pgsz; | ||
670 | current->mm->locked_vm -= pgsz; | ||
671 | |||
672 | up_write(¤t->mm->mmap_sem); | ||
673 | } | ||
674 | 666 | ||
675 | void free_locked_buffer(void *buffer, size_t size) | 667 | mm->total_vm -= pgsz; |
676 | { | 668 | mm->locked_vm -= pgsz; |
677 | release_locked_buffer(buffer, size); | ||
678 | 669 | ||
679 | kfree(buffer); | 670 | up_write(&mm->mmap_sem); |
680 | } | 671 | } |