diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-10 22:53:40 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-10 22:53:40 -0400 |
commit | 862366118026a358882eefc70238dbcc3db37aac (patch) | |
tree | 4eb62bc10327a5afac064a95a091ea05ecd2acc1 /arch | |
parent | 57eee9ae7bbcfb692dc96c739a5184adb6349733 (diff) | |
parent | 511b01bdf64ad8a38414096eab283c7784aebfc4 (diff) |
Merge branch 'tracing-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'tracing-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (244 commits)
Revert "x86, bts: reenable ptrace branch trace support"
tracing: do not translate event helper macros in print format
ftrace/documentation: fix typo in function grapher name
tracing/events: convert block trace points to TRACE_EVENT(), fix !CONFIG_BLOCK
tracing: add protection around module events unload
tracing: add trace_seq_vprint interface
tracing: fix the block trace points print size
tracing/events: convert block trace points to TRACE_EVENT()
ring-buffer: fix ret in rb_add_time_stamp
ring-buffer: pass in lockdep class key for reader_lock
tracing: add annotation to what type of stack trace is recorded
tracing: fix multiple use of __print_flags and __print_symbolic
tracing/events: fix output format of user stack
tracing/events: fix output format of kernel stack
tracing/trace_stack: fix the number of entries in the header
ring-buffer: discard timestamps that are at the start of the buffer
ring-buffer: try to discard unneeded timestamps
ring-buffer: fix bug in ring_buffer_discard_commit
ftrace: do not profile functions when disabled
tracing: make trace pipe recognize latency format flag
...
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/Kconfig.debug | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/ds.h | 82 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 35 | ||||
-rw-r--r-- | arch/x86/include/asm/ptrace.h | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/tlbflush.h | 8 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/ds.c | 921 | ||||
-rw-r--r-- | arch/x86/kernel/ds_selftest.c | 408 | ||||
-rw-r--r-- | arch/x86/kernel/ds_selftest.h | 15 | ||||
-rw-r--r-- | arch/x86/kernel/entry_64.S | 19 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/ptrace.c | 284 | ||||
-rw-r--r-- | arch/x86/kernel/stacktrace.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/kmmio.c | 104 | ||||
-rw-r--r-- | arch/x86/mm/mmio-mod.c | 2 |
17 files changed, 1444 insertions, 466 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 33fac6bbe1c2..d105f29bb6bb 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -174,6 +174,15 @@ config IOMMU_LEAK | |||
174 | Add a simple leak tracer to the IOMMU code. This is useful when you | 174 | Add a simple leak tracer to the IOMMU code. This is useful when you |
175 | are debugging a buggy device driver that leaks IOMMU mappings. | 175 | are debugging a buggy device driver that leaks IOMMU mappings. |
176 | 176 | ||
177 | config X86_DS_SELFTEST | ||
178 | bool "DS selftest" | ||
179 | default y | ||
180 | depends on DEBUG_KERNEL | ||
181 | depends on X86_DS | ||
182 | ---help--- | ||
183 | Perform Debug Store selftests at boot time. | ||
184 | If in doubt, say "N". | ||
185 | |||
177 | config HAVE_MMIOTRACE_SUPPORT | 186 | config HAVE_MMIOTRACE_SUPPORT |
178 | def_bool y | 187 | def_bool y |
179 | 188 | ||
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index a8f672ba100c..70dac199b093 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h | |||
@@ -15,8 +15,8 @@ | |||
15 | * - buffer allocation (memory accounting) | 15 | * - buffer allocation (memory accounting) |
16 | * | 16 | * |
17 | * | 17 | * |
18 | * Copyright (C) 2007-2008 Intel Corporation. | 18 | * Copyright (C) 2007-2009 Intel Corporation. |
19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 | 19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #ifndef _ASM_X86_DS_H | 22 | #ifndef _ASM_X86_DS_H |
@@ -83,8 +83,10 @@ enum ds_feature { | |||
83 | * The interrupt threshold is independent from the overflow callback | 83 | * The interrupt threshold is independent from the overflow callback |
84 | * to allow users to use their own overflow interrupt handling mechanism. | 84 | * to allow users to use their own overflow interrupt handling mechanism. |
85 | * | 85 | * |
86 | * task: the task to request recording for; | 86 | * The function might sleep. |
87 | * NULL for per-cpu recording on the current cpu | 87 | * |
88 | * task: the task to request recording for | ||
89 | * cpu: the cpu to request recording for | ||
88 | * base: the base pointer for the (non-pageable) buffer; | 90 | * base: the base pointer for the (non-pageable) buffer; |
89 | * size: the size of the provided buffer in bytes | 91 | * size: the size of the provided buffer in bytes |
90 | * ovfl: pointer to a function to be called on buffer overflow; | 92 | * ovfl: pointer to a function to be called on buffer overflow; |
@@ -93,19 +95,28 @@ enum ds_feature { | |||
93 | * -1 if no interrupt threshold is requested. | 95 | * -1 if no interrupt threshold is requested. |
94 | * flags: a bit-mask of the above flags | 96 | * flags: a bit-mask of the above flags |
95 | */ | 97 | */ |
96 | extern struct bts_tracer *ds_request_bts(struct task_struct *task, | 98 | extern struct bts_tracer *ds_request_bts_task(struct task_struct *task, |
97 | void *base, size_t size, | 99 | void *base, size_t size, |
98 | bts_ovfl_callback_t ovfl, | 100 | bts_ovfl_callback_t ovfl, |
99 | size_t th, unsigned int flags); | 101 | size_t th, unsigned int flags); |
100 | extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, | 102 | extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, |
101 | void *base, size_t size, | 103 | bts_ovfl_callback_t ovfl, |
102 | pebs_ovfl_callback_t ovfl, | 104 | size_t th, unsigned int flags); |
103 | size_t th, unsigned int flags); | 105 | extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, |
106 | void *base, size_t size, | ||
107 | pebs_ovfl_callback_t ovfl, | ||
108 | size_t th, unsigned int flags); | ||
109 | extern struct pebs_tracer *ds_request_pebs_cpu(int cpu, | ||
110 | void *base, size_t size, | ||
111 | pebs_ovfl_callback_t ovfl, | ||
112 | size_t th, unsigned int flags); | ||
104 | 113 | ||
105 | /* | 114 | /* |
106 | * Release BTS or PEBS resources | 115 | * Release BTS or PEBS resources |
107 | * Suspend and resume BTS or PEBS tracing | 116 | * Suspend and resume BTS or PEBS tracing |
108 | * | 117 | * |
118 | * Must be called with irq's enabled. | ||
119 | * | ||
109 | * tracer: the tracer handle returned from ds_request_~() | 120 | * tracer: the tracer handle returned from ds_request_~() |
110 | */ | 121 | */ |
111 | extern void ds_release_bts(struct bts_tracer *tracer); | 122 | extern void ds_release_bts(struct bts_tracer *tracer); |
@@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer); | |||
115 | extern void ds_suspend_pebs(struct pebs_tracer *tracer); | 126 | extern void ds_suspend_pebs(struct pebs_tracer *tracer); |
116 | extern void ds_resume_pebs(struct pebs_tracer *tracer); | 127 | extern void ds_resume_pebs(struct pebs_tracer *tracer); |
117 | 128 | ||
129 | /* | ||
130 | * Release BTS or PEBS resources | ||
131 | * Suspend and resume BTS or PEBS tracing | ||
132 | * | ||
133 | * Cpu tracers must call this on the traced cpu. | ||
134 | * Task tracers must call ds_release_~_noirq() for themselves. | ||
135 | * | ||
136 | * May be called with irq's disabled. | ||
137 | * | ||
138 | * Returns 0 if successful; | ||
139 | * -EPERM if the cpu tracer does not trace the current cpu. | ||
140 | * -EPERM if the task tracer does not trace itself. | ||
141 | * | ||
142 | * tracer: the tracer handle returned from ds_request_~() | ||
143 | */ | ||
144 | extern int ds_release_bts_noirq(struct bts_tracer *tracer); | ||
145 | extern int ds_suspend_bts_noirq(struct bts_tracer *tracer); | ||
146 | extern int ds_resume_bts_noirq(struct bts_tracer *tracer); | ||
147 | extern int ds_release_pebs_noirq(struct pebs_tracer *tracer); | ||
148 | extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer); | ||
149 | extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer); | ||
150 | |||
118 | 151 | ||
119 | /* | 152 | /* |
120 | * The raw DS buffer state as it is used for BTS and PEBS recording. | 153 | * The raw DS buffer state as it is used for BTS and PEBS recording. |
@@ -170,9 +203,9 @@ struct bts_struct { | |||
170 | } lbr; | 203 | } lbr; |
171 | /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ | 204 | /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ |
172 | struct { | 205 | struct { |
173 | __u64 jiffies; | 206 | __u64 clock; |
174 | pid_t pid; | 207 | pid_t pid; |
175 | } timestamp; | 208 | } event; |
176 | } variant; | 209 | } variant; |
177 | }; | 210 | }; |
178 | 211 | ||
@@ -201,8 +234,12 @@ struct bts_trace { | |||
201 | struct pebs_trace { | 234 | struct pebs_trace { |
202 | struct ds_trace ds; | 235 | struct ds_trace ds; |
203 | 236 | ||
204 | /* the PEBS reset value */ | 237 | /* the number of valid counters in the below array */ |
205 | unsigned long long reset_value; | 238 | unsigned int counters; |
239 | |||
240 | #define MAX_PEBS_COUNTERS 4 | ||
241 | /* the counter reset value */ | ||
242 | unsigned long long counter_reset[MAX_PEBS_COUNTERS]; | ||
206 | }; | 243 | }; |
207 | 244 | ||
208 | 245 | ||
@@ -237,9 +274,11 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer); | |||
237 | * Returns 0 on success; -Eerrno on error | 274 | * Returns 0 on success; -Eerrno on error |
238 | * | 275 | * |
239 | * tracer: the tracer handle returned from ds_request_pebs() | 276 | * tracer: the tracer handle returned from ds_request_pebs() |
277 | * counter: the index of the counter | ||
240 | * value: the new counter reset value | 278 | * value: the new counter reset value |
241 | */ | 279 | */ |
242 | extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); | 280 | extern int ds_set_pebs_reset(struct pebs_tracer *tracer, |
281 | unsigned int counter, u64 value); | ||
243 | 282 | ||
244 | /* | 283 | /* |
245 | * Initialization | 284 | * Initialization |
@@ -252,21 +291,12 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); | |||
252 | */ | 291 | */ |
253 | extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); | 292 | extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); |
254 | 293 | ||
255 | /* | ||
256 | * Task clone/init and cleanup work | ||
257 | */ | ||
258 | extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father); | ||
259 | extern void ds_exit_thread(struct task_struct *tsk); | ||
260 | |||
261 | #else /* CONFIG_X86_DS */ | 294 | #else /* CONFIG_X86_DS */ |
262 | 295 | ||
263 | struct cpuinfo_x86; | 296 | struct cpuinfo_x86; |
264 | static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} | 297 | static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} |
265 | static inline void ds_switch_to(struct task_struct *prev, | 298 | static inline void ds_switch_to(struct task_struct *prev, |
266 | struct task_struct *next) {} | 299 | struct task_struct *next) {} |
267 | static inline void ds_copy_thread(struct task_struct *tsk, | ||
268 | struct task_struct *father) {} | ||
269 | static inline void ds_exit_thread(struct task_struct *tsk) {} | ||
270 | 300 | ||
271 | #endif /* CONFIG_X86_DS */ | 301 | #endif /* CONFIG_X86_DS */ |
272 | #endif /* _ASM_X86_DS_H */ | 302 | #endif /* _ASM_X86_DS_H */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 87ede2f31bc7..c7768269b1cf 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -462,14 +462,8 @@ struct thread_struct { | |||
462 | unsigned io_bitmap_max; | 462 | unsigned io_bitmap_max; |
463 | /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ | 463 | /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ |
464 | unsigned long debugctlmsr; | 464 | unsigned long debugctlmsr; |
465 | #ifdef CONFIG_X86_DS | 465 | /* Debug Store context; see asm/ds.h */ |
466 | /* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ | ||
467 | struct ds_context *ds_ctx; | 466 | struct ds_context *ds_ctx; |
468 | #endif /* CONFIG_X86_DS */ | ||
469 | #ifdef CONFIG_X86_PTRACE_BTS | ||
470 | /* the signal to send on a bts buffer overflow */ | ||
471 | unsigned int bts_ovfl_signal; | ||
472 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
473 | }; | 467 | }; |
474 | 468 | ||
475 | static inline unsigned long native_get_debugreg(int regno) | 469 | static inline unsigned long native_get_debugreg(int regno) |
@@ -797,6 +791,21 @@ static inline unsigned long get_debugctlmsr(void) | |||
797 | return debugctlmsr; | 791 | return debugctlmsr; |
798 | } | 792 | } |
799 | 793 | ||
794 | static inline unsigned long get_debugctlmsr_on_cpu(int cpu) | ||
795 | { | ||
796 | u64 debugctlmsr = 0; | ||
797 | u32 val1, val2; | ||
798 | |||
799 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
800 | if (boot_cpu_data.x86 < 6) | ||
801 | return 0; | ||
802 | #endif | ||
803 | rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2); | ||
804 | debugctlmsr = val1 | ((u64)val2 << 32); | ||
805 | |||
806 | return debugctlmsr; | ||
807 | } | ||
808 | |||
800 | static inline void update_debugctlmsr(unsigned long debugctlmsr) | 809 | static inline void update_debugctlmsr(unsigned long debugctlmsr) |
801 | { | 810 | { |
802 | #ifndef CONFIG_X86_DEBUGCTLMSR | 811 | #ifndef CONFIG_X86_DEBUGCTLMSR |
@@ -806,6 +815,18 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) | |||
806 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); | 815 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); |
807 | } | 816 | } |
808 | 817 | ||
818 | static inline void update_debugctlmsr_on_cpu(int cpu, | ||
819 | unsigned long debugctlmsr) | ||
820 | { | ||
821 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
822 | if (boot_cpu_data.x86 < 6) | ||
823 | return; | ||
824 | #endif | ||
825 | wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, | ||
826 | (u32)((u64)debugctlmsr), | ||
827 | (u32)((u64)debugctlmsr >> 32)); | ||
828 | } | ||
829 | |||
809 | /* | 830 | /* |
810 | * from system description table in BIOS. Mostly for MCA use, but | 831 | * from system description table in BIOS. Mostly for MCA use, but |
811 | * others may find it useful: | 832 | * others may find it useful: |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 624f133943ed..0f0d908349aa 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -236,12 +236,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx, | |||
236 | extern int do_set_thread_area(struct task_struct *p, int idx, | 236 | extern int do_set_thread_area(struct task_struct *p, int idx, |
237 | struct user_desc __user *info, int can_allocate); | 237 | struct user_desc __user *info, int can_allocate); |
238 | 238 | ||
239 | extern void x86_ptrace_untrace(struct task_struct *); | 239 | #ifdef CONFIG_X86_PTRACE_BTS |
240 | extern void x86_ptrace_fork(struct task_struct *child, | 240 | extern void ptrace_bts_untrace(struct task_struct *tsk); |
241 | unsigned long clone_flags); | ||
242 | 241 | ||
243 | #define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk) | 242 | #define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk) |
244 | #define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags) | 243 | #endif /* CONFIG_X86_PTRACE_BTS */ |
245 | 244 | ||
246 | #endif /* __KERNEL__ */ | 245 | #endif /* __KERNEL__ */ |
247 | 246 | ||
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 16a5c84b0329..a5ecc9c33e92 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
@@ -17,7 +17,7 @@ | |||
17 | 17 | ||
18 | static inline void __native_flush_tlb(void) | 18 | static inline void __native_flush_tlb(void) |
19 | { | 19 | { |
20 | write_cr3(read_cr3()); | 20 | native_write_cr3(native_read_cr3()); |
21 | } | 21 | } |
22 | 22 | ||
23 | static inline void __native_flush_tlb_global(void) | 23 | static inline void __native_flush_tlb_global(void) |
@@ -32,11 +32,11 @@ static inline void __native_flush_tlb_global(void) | |||
32 | */ | 32 | */ |
33 | raw_local_irq_save(flags); | 33 | raw_local_irq_save(flags); |
34 | 34 | ||
35 | cr4 = read_cr4(); | 35 | cr4 = native_read_cr4(); |
36 | /* clear PGE */ | 36 | /* clear PGE */ |
37 | write_cr4(cr4 & ~X86_CR4_PGE); | 37 | native_write_cr4(cr4 & ~X86_CR4_PGE); |
38 | /* write old PGE again and flush TLBs */ | 38 | /* write old PGE again and flush TLBs */ |
39 | write_cr4(cr4); | 39 | native_write_cr4(cr4); |
40 | 40 | ||
41 | raw_local_irq_restore(flags); | 41 | raw_local_irq_restore(flags); |
42 | } | 42 | } |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 235f5927bb97..4f78bd682125 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -44,6 +44,7 @@ obj-y += process.o | |||
44 | obj-y += i387.o xsave.o | 44 | obj-y += i387.o xsave.o |
45 | obj-y += ptrace.o | 45 | obj-y += ptrace.o |
46 | obj-$(CONFIG_X86_DS) += ds.o | 46 | obj-$(CONFIG_X86_DS) += ds.o |
47 | obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o | ||
47 | obj-$(CONFIG_X86_32) += tls.o | 48 | obj-$(CONFIG_X86_32) += tls.o |
48 | obj-$(CONFIG_IA32_EMULATION) += tls.o | 49 | obj-$(CONFIG_IA32_EMULATION) += tls.o |
49 | obj-y += step.o | 50 | obj-y += step.o |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 87b67e3a765a..48bfe1386038 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
@@ -19,45 +19,61 @@ | |||
19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 | 19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 |
20 | */ | 20 | */ |
21 | 21 | ||
22 | 22 | #include <linux/kernel.h> | |
23 | #include <asm/ds.h> | ||
24 | |||
25 | #include <linux/errno.h> | ||
26 | #include <linux/string.h> | 23 | #include <linux/string.h> |
27 | #include <linux/slab.h> | 24 | #include <linux/errno.h> |
28 | #include <linux/sched.h> | 25 | #include <linux/sched.h> |
26 | #include <linux/slab.h> | ||
29 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
30 | #include <linux/kernel.h> | 28 | #include <linux/trace_clock.h> |
29 | |||
30 | #include <asm/ds.h> | ||
31 | 31 | ||
32 | #include "ds_selftest.h" | ||
32 | 33 | ||
33 | /* | 34 | /* |
34 | * The configuration for a particular DS hardware implementation. | 35 | * The configuration for a particular DS hardware implementation: |
35 | */ | 36 | */ |
36 | struct ds_configuration { | 37 | struct ds_configuration { |
37 | /* the name of the configuration */ | 38 | /* The name of the configuration: */ |
38 | const char *name; | 39 | const char *name; |
39 | /* the size of one pointer-typed field in the DS structure and | 40 | |
40 | in the BTS and PEBS buffers in bytes; | 41 | /* The size of pointer-typed fields in DS, BTS, and PEBS: */ |
41 | this covers the first 8 DS fields related to buffer management. */ | 42 | unsigned char sizeof_ptr_field; |
42 | unsigned char sizeof_field; | 43 | |
43 | /* the size of a BTS/PEBS record in bytes */ | 44 | /* The size of a BTS/PEBS record in bytes: */ |
44 | unsigned char sizeof_rec[2]; | 45 | unsigned char sizeof_rec[2]; |
45 | /* a series of bit-masks to control various features indexed | 46 | |
46 | * by enum ds_feature */ | 47 | /* The number of pebs counter reset values in the DS structure. */ |
47 | unsigned long ctl[dsf_ctl_max]; | 48 | unsigned char nr_counter_reset; |
49 | |||
50 | /* Control bit-masks indexed by enum ds_feature: */ | ||
51 | unsigned long ctl[dsf_ctl_max]; | ||
48 | }; | 52 | }; |
49 | static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); | 53 | static struct ds_configuration ds_cfg __read_mostly; |
54 | |||
55 | |||
56 | /* Maximal size of a DS configuration: */ | ||
57 | #define MAX_SIZEOF_DS 0x80 | ||
50 | 58 | ||
51 | #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) | 59 | /* Maximal size of a BTS record: */ |
60 | #define MAX_SIZEOF_BTS (3 * 8) | ||
52 | 61 | ||
53 | #define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ | 62 | /* BTS and PEBS buffer alignment: */ |
54 | #define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ | 63 | #define DS_ALIGNMENT (1 << 3) |
55 | #define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ | ||
56 | 64 | ||
57 | #define BTS_CONTROL \ | 65 | /* Number of buffer pointers in DS: */ |
58 | (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ | 66 | #define NUM_DS_PTR_FIELDS 8 |
59 | ds_cfg.ctl[dsf_bts_overflow]) | ||
60 | 67 | ||
68 | /* Size of a pebs reset value in DS: */ | ||
69 | #define PEBS_RESET_FIELD_SIZE 8 | ||
70 | |||
71 | /* Mask of control bits in the DS MSR register: */ | ||
72 | #define BTS_CONTROL \ | ||
73 | ( ds_cfg.ctl[dsf_bts] | \ | ||
74 | ds_cfg.ctl[dsf_bts_kernel] | \ | ||
75 | ds_cfg.ctl[dsf_bts_user] | \ | ||
76 | ds_cfg.ctl[dsf_bts_overflow] ) | ||
61 | 77 | ||
62 | /* | 78 | /* |
63 | * A BTS or PEBS tracer. | 79 | * A BTS or PEBS tracer. |
@@ -66,29 +82,36 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); | |||
66 | * to identify tracers. | 82 | * to identify tracers. |
67 | */ | 83 | */ |
68 | struct ds_tracer { | 84 | struct ds_tracer { |
69 | /* the DS context (partially) owned by this tracer */ | 85 | /* The DS context (partially) owned by this tracer. */ |
70 | struct ds_context *context; | 86 | struct ds_context *context; |
71 | /* the buffer provided on ds_request() and its size in bytes */ | 87 | /* The buffer provided on ds_request() and its size in bytes. */ |
72 | void *buffer; | 88 | void *buffer; |
73 | size_t size; | 89 | size_t size; |
74 | }; | 90 | }; |
75 | 91 | ||
76 | struct bts_tracer { | 92 | struct bts_tracer { |
77 | /* the common DS part */ | 93 | /* The common DS part: */ |
78 | struct ds_tracer ds; | 94 | struct ds_tracer ds; |
79 | /* the trace including the DS configuration */ | 95 | |
80 | struct bts_trace trace; | 96 | /* The trace including the DS configuration: */ |
81 | /* buffer overflow notification function */ | 97 | struct bts_trace trace; |
82 | bts_ovfl_callback_t ovfl; | 98 | |
99 | /* Buffer overflow notification function: */ | ||
100 | bts_ovfl_callback_t ovfl; | ||
101 | |||
102 | /* Active flags affecting trace collection. */ | ||
103 | unsigned int flags; | ||
83 | }; | 104 | }; |
84 | 105 | ||
85 | struct pebs_tracer { | 106 | struct pebs_tracer { |
86 | /* the common DS part */ | 107 | /* The common DS part: */ |
87 | struct ds_tracer ds; | 108 | struct ds_tracer ds; |
88 | /* the trace including the DS configuration */ | 109 | |
89 | struct pebs_trace trace; | 110 | /* The trace including the DS configuration: */ |
90 | /* buffer overflow notification function */ | 111 | struct pebs_trace trace; |
91 | pebs_ovfl_callback_t ovfl; | 112 | |
113 | /* Buffer overflow notification function: */ | ||
114 | pebs_ovfl_callback_t ovfl; | ||
92 | }; | 115 | }; |
93 | 116 | ||
94 | /* | 117 | /* |
@@ -97,6 +120,7 @@ struct pebs_tracer { | |||
97 | * | 120 | * |
98 | * The DS configuration consists of the following fields; different | 121 | * The DS configuration consists of the following fields; different |
99 | * architetures vary in the size of those fields. | 122 | * architetures vary in the size of those fields. |
123 | * | ||
100 | * - double-word aligned base linear address of the BTS buffer | 124 | * - double-word aligned base linear address of the BTS buffer |
101 | * - write pointer into the BTS buffer | 125 | * - write pointer into the BTS buffer |
102 | * - end linear address of the BTS buffer (one byte beyond the end of | 126 | * - end linear address of the BTS buffer (one byte beyond the end of |
@@ -135,21 +159,22 @@ enum ds_field { | |||
135 | }; | 159 | }; |
136 | 160 | ||
137 | enum ds_qualifier { | 161 | enum ds_qualifier { |
138 | ds_bts = 0, | 162 | ds_bts = 0, |
139 | ds_pebs | 163 | ds_pebs |
140 | }; | 164 | }; |
141 | 165 | ||
142 | static inline unsigned long ds_get(const unsigned char *base, | 166 | static inline unsigned long |
143 | enum ds_qualifier qual, enum ds_field field) | 167 | ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field) |
144 | { | 168 | { |
145 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | 169 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); |
146 | return *(unsigned long *)base; | 170 | return *(unsigned long *)base; |
147 | } | 171 | } |
148 | 172 | ||
149 | static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | 173 | static inline void |
150 | enum ds_field field, unsigned long value) | 174 | ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field, |
175 | unsigned long value) | ||
151 | { | 176 | { |
152 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | 177 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); |
153 | (*(unsigned long *)base) = value; | 178 | (*(unsigned long *)base) = value; |
154 | } | 179 | } |
155 | 180 | ||
@@ -159,7 +184,6 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | |||
159 | */ | 184 | */ |
160 | static DEFINE_SPINLOCK(ds_lock); | 185 | static DEFINE_SPINLOCK(ds_lock); |
161 | 186 | ||
162 | |||
163 | /* | 187 | /* |
164 | * We either support (system-wide) per-cpu or per-thread allocation. | 188 | * We either support (system-wide) per-cpu or per-thread allocation. |
165 | * We distinguish the two based on the task_struct pointer, where a | 189 | * We distinguish the two based on the task_struct pointer, where a |
@@ -178,12 +202,28 @@ static DEFINE_SPINLOCK(ds_lock); | |||
178 | */ | 202 | */ |
179 | static atomic_t tracers = ATOMIC_INIT(0); | 203 | static atomic_t tracers = ATOMIC_INIT(0); |
180 | 204 | ||
181 | static inline void get_tracer(struct task_struct *task) | 205 | static inline int get_tracer(struct task_struct *task) |
182 | { | 206 | { |
183 | if (task) | 207 | int error; |
208 | |||
209 | spin_lock_irq(&ds_lock); | ||
210 | |||
211 | if (task) { | ||
212 | error = -EPERM; | ||
213 | if (atomic_read(&tracers) < 0) | ||
214 | goto out; | ||
184 | atomic_inc(&tracers); | 215 | atomic_inc(&tracers); |
185 | else | 216 | } else { |
217 | error = -EPERM; | ||
218 | if (atomic_read(&tracers) > 0) | ||
219 | goto out; | ||
186 | atomic_dec(&tracers); | 220 | atomic_dec(&tracers); |
221 | } | ||
222 | |||
223 | error = 0; | ||
224 | out: | ||
225 | spin_unlock_irq(&ds_lock); | ||
226 | return error; | ||
187 | } | 227 | } |
188 | 228 | ||
189 | static inline void put_tracer(struct task_struct *task) | 229 | static inline void put_tracer(struct task_struct *task) |
@@ -194,14 +234,6 @@ static inline void put_tracer(struct task_struct *task) | |||
194 | atomic_inc(&tracers); | 234 | atomic_inc(&tracers); |
195 | } | 235 | } |
196 | 236 | ||
197 | static inline int check_tracer(struct task_struct *task) | ||
198 | { | ||
199 | return task ? | ||
200 | (atomic_read(&tracers) >= 0) : | ||
201 | (atomic_read(&tracers) <= 0); | ||
202 | } | ||
203 | |||
204 | |||
205 | /* | 237 | /* |
206 | * The DS context is either attached to a thread or to a cpu: | 238 | * The DS context is either attached to a thread or to a cpu: |
207 | * - in the former case, the thread_struct contains a pointer to the | 239 | * - in the former case, the thread_struct contains a pointer to the |
@@ -213,61 +245,58 @@ static inline int check_tracer(struct task_struct *task) | |||
213 | * deallocated when the last user puts the context. | 245 | * deallocated when the last user puts the context. |
214 | */ | 246 | */ |
215 | struct ds_context { | 247 | struct ds_context { |
216 | /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ | 248 | /* The DS configuration; goes into MSR_IA32_DS_AREA: */ |
217 | unsigned char ds[MAX_SIZEOF_DS]; | 249 | unsigned char ds[MAX_SIZEOF_DS]; |
218 | /* the owner of the BTS and PEBS configuration, respectively */ | 250 | |
219 | struct bts_tracer *bts_master; | 251 | /* The owner of the BTS and PEBS configuration, respectively: */ |
220 | struct pebs_tracer *pebs_master; | 252 | struct bts_tracer *bts_master; |
221 | /* use count */ | 253 | struct pebs_tracer *pebs_master; |
222 | unsigned long count; | ||
223 | /* a pointer to the context location inside the thread_struct | ||
224 | * or the per_cpu context array */ | ||
225 | struct ds_context **this; | ||
226 | /* a pointer to the task owning this context, or NULL, if the | ||
227 | * context is owned by a cpu */ | ||
228 | struct task_struct *task; | ||
229 | }; | ||
230 | 254 | ||
231 | static DEFINE_PER_CPU(struct ds_context *, system_context_array); | 255 | /* Use count: */ |
256 | unsigned long count; | ||
232 | 257 | ||
233 | #define system_context per_cpu(system_context_array, smp_processor_id()) | 258 | /* Pointer to the context pointer field: */ |
259 | struct ds_context **this; | ||
260 | |||
261 | /* The traced task; NULL for cpu tracing: */ | ||
262 | struct task_struct *task; | ||
263 | |||
264 | /* The traced cpu; only valid if task is NULL: */ | ||
265 | int cpu; | ||
266 | }; | ||
234 | 267 | ||
268 | static DEFINE_PER_CPU(struct ds_context *, cpu_context); | ||
235 | 269 | ||
236 | static inline struct ds_context *ds_get_context(struct task_struct *task) | 270 | |
271 | static struct ds_context *ds_get_context(struct task_struct *task, int cpu) | ||
237 | { | 272 | { |
238 | struct ds_context **p_context = | 273 | struct ds_context **p_context = |
239 | (task ? &task->thread.ds_ctx : &system_context); | 274 | (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu)); |
240 | struct ds_context *context = NULL; | 275 | struct ds_context *context = NULL; |
241 | struct ds_context *new_context = NULL; | 276 | struct ds_context *new_context = NULL; |
242 | unsigned long irq; | ||
243 | 277 | ||
244 | /* Chances are small that we already have a context. */ | 278 | /* Chances are small that we already have a context. */ |
245 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); | 279 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); |
246 | if (!new_context) | 280 | if (!new_context) |
247 | return NULL; | 281 | return NULL; |
248 | 282 | ||
249 | spin_lock_irqsave(&ds_lock, irq); | 283 | spin_lock_irq(&ds_lock); |
250 | 284 | ||
251 | context = *p_context; | 285 | context = *p_context; |
252 | if (!context) { | 286 | if (likely(!context)) { |
253 | context = new_context; | 287 | context = new_context; |
254 | 288 | ||
255 | context->this = p_context; | 289 | context->this = p_context; |
256 | context->task = task; | 290 | context->task = task; |
291 | context->cpu = cpu; | ||
257 | context->count = 0; | 292 | context->count = 0; |
258 | 293 | ||
259 | if (task) | ||
260 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
261 | |||
262 | if (!task || (task == current)) | ||
263 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); | ||
264 | |||
265 | *p_context = context; | 294 | *p_context = context; |
266 | } | 295 | } |
267 | 296 | ||
268 | context->count++; | 297 | context->count++; |
269 | 298 | ||
270 | spin_unlock_irqrestore(&ds_lock, irq); | 299 | spin_unlock_irq(&ds_lock); |
271 | 300 | ||
272 | if (context != new_context) | 301 | if (context != new_context) |
273 | kfree(new_context); | 302 | kfree(new_context); |
@@ -275,8 +304,9 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) | |||
275 | return context; | 304 | return context; |
276 | } | 305 | } |
277 | 306 | ||
278 | static inline void ds_put_context(struct ds_context *context) | 307 | static void ds_put_context(struct ds_context *context) |
279 | { | 308 | { |
309 | struct task_struct *task; | ||
280 | unsigned long irq; | 310 | unsigned long irq; |
281 | 311 | ||
282 | if (!context) | 312 | if (!context) |
@@ -291,17 +321,55 @@ static inline void ds_put_context(struct ds_context *context) | |||
291 | 321 | ||
292 | *(context->this) = NULL; | 322 | *(context->this) = NULL; |
293 | 323 | ||
294 | if (context->task) | 324 | task = context->task; |
295 | clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | 325 | |
326 | if (task) | ||
327 | clear_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
296 | 328 | ||
297 | if (!context->task || (context->task == current)) | 329 | /* |
298 | wrmsrl(MSR_IA32_DS_AREA, 0); | 330 | * We leave the (now dangling) pointer to the DS configuration in |
331 | * the DS_AREA msr. This is as good or as bad as replacing it with | ||
332 | * NULL - the hardware would crash if we enabled tracing. | ||
333 | * | ||
334 | * This saves us some problems with having to write an msr on a | ||
335 | * different cpu while preventing others from doing the same for the | ||
336 | * next context for that same cpu. | ||
337 | */ | ||
299 | 338 | ||
300 | spin_unlock_irqrestore(&ds_lock, irq); | 339 | spin_unlock_irqrestore(&ds_lock, irq); |
301 | 340 | ||
341 | /* The context might still be in use for context switching. */ | ||
342 | if (task && (task != current)) | ||
343 | wait_task_context_switch(task); | ||
344 | |||
302 | kfree(context); | 345 | kfree(context); |
303 | } | 346 | } |
304 | 347 | ||
348 | static void ds_install_ds_area(struct ds_context *context) | ||
349 | { | ||
350 | unsigned long ds; | ||
351 | |||
352 | ds = (unsigned long)context->ds; | ||
353 | |||
354 | /* | ||
355 | * There is a race between the bts master and the pebs master. | ||
356 | * | ||
357 | * The thread/cpu access is synchronized via get/put_cpu() for | ||
358 | * task tracing and via wrmsr_on_cpu for cpu tracing. | ||
359 | * | ||
360 | * If bts and pebs are collected for the same task or same cpu, | ||
361 | * the same confiuration is written twice. | ||
362 | */ | ||
363 | if (context->task) { | ||
364 | get_cpu(); | ||
365 | if (context->task == current) | ||
366 | wrmsrl(MSR_IA32_DS_AREA, ds); | ||
367 | set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | ||
368 | put_cpu(); | ||
369 | } else | ||
370 | wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA, | ||
371 | (u32)((u64)ds), (u32)((u64)ds >> 32)); | ||
372 | } | ||
305 | 373 | ||
306 | /* | 374 | /* |
307 | * Call the tracer's callback on a buffer overflow. | 375 | * Call the tracer's callback on a buffer overflow. |
@@ -332,9 +400,9 @@ static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) | |||
332 | * The remainder of any partially written record is zeroed out. | 400 | * The remainder of any partially written record is zeroed out. |
333 | * | 401 | * |
334 | * context: the DS context | 402 | * context: the DS context |
335 | * qual: the buffer type | 403 | * qual: the buffer type |
336 | * record: the data to write | 404 | * record: the data to write |
337 | * size: the size of the data | 405 | * size: the size of the data |
338 | */ | 406 | */ |
339 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, | 407 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, |
340 | const void *record, size_t size) | 408 | const void *record, size_t size) |
@@ -349,14 +417,14 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, | |||
349 | unsigned long write_size, adj_write_size; | 417 | unsigned long write_size, adj_write_size; |
350 | 418 | ||
351 | /* | 419 | /* |
352 | * write as much as possible without producing an | 420 | * Write as much as possible without producing an |
353 | * overflow interrupt. | 421 | * overflow interrupt. |
354 | * | 422 | * |
355 | * interrupt_threshold must either be | 423 | * Interrupt_threshold must either be |
356 | * - bigger than absolute_maximum or | 424 | * - bigger than absolute_maximum or |
357 | * - point to a record between buffer_base and absolute_maximum | 425 | * - point to a record between buffer_base and absolute_maximum |
358 | * | 426 | * |
359 | * index points to a valid record. | 427 | * Index points to a valid record. |
360 | */ | 428 | */ |
361 | base = ds_get(context->ds, qual, ds_buffer_base); | 429 | base = ds_get(context->ds, qual, ds_buffer_base); |
362 | index = ds_get(context->ds, qual, ds_index); | 430 | index = ds_get(context->ds, qual, ds_index); |
@@ -365,8 +433,10 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, | |||
365 | 433 | ||
366 | write_end = min(end, int_th); | 434 | write_end = min(end, int_th); |
367 | 435 | ||
368 | /* if we are already beyond the interrupt threshold, | 436 | /* |
369 | * we fill the entire buffer */ | 437 | * If we are already beyond the interrupt threshold, |
438 | * we fill the entire buffer. | ||
439 | */ | ||
370 | if (write_end <= index) | 440 | if (write_end <= index) |
371 | write_end = end; | 441 | write_end = end; |
372 | 442 | ||
@@ -383,7 +453,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, | |||
383 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | 453 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; |
384 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | 454 | adj_write_size *= ds_cfg.sizeof_rec[qual]; |
385 | 455 | ||
386 | /* zero out trailing bytes */ | 456 | /* Zero out trailing bytes. */ |
387 | memset((char *)index + write_size, 0, | 457 | memset((char *)index + write_size, 0, |
388 | adj_write_size - write_size); | 458 | adj_write_size - write_size); |
389 | index += adj_write_size; | 459 | index += adj_write_size; |
@@ -410,7 +480,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, | |||
410 | * Later architectures use 64bit pointers throughout, whereas earlier | 480 | * Later architectures use 64bit pointers throughout, whereas earlier |
411 | * architectures use 32bit pointers in 32bit mode. | 481 | * architectures use 32bit pointers in 32bit mode. |
412 | * | 482 | * |
413 | * We compute the base address for the first 8 fields based on: | 483 | * We compute the base address for the fields based on: |
414 | * - the field size stored in the DS configuration | 484 | * - the field size stored in the DS configuration |
415 | * - the relative field position | 485 | * - the relative field position |
416 | * | 486 | * |
@@ -431,23 +501,23 @@ enum bts_field { | |||
431 | bts_to, | 501 | bts_to, |
432 | bts_flags, | 502 | bts_flags, |
433 | 503 | ||
434 | bts_qual = bts_from, | 504 | bts_qual = bts_from, |
435 | bts_jiffies = bts_to, | 505 | bts_clock = bts_to, |
436 | bts_pid = bts_flags, | 506 | bts_pid = bts_flags, |
437 | 507 | ||
438 | bts_qual_mask = (bts_qual_max - 1), | 508 | bts_qual_mask = (bts_qual_max - 1), |
439 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | 509 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) |
440 | }; | 510 | }; |
441 | 511 | ||
442 | static inline unsigned long bts_get(const char *base, enum bts_field field) | 512 | static inline unsigned long bts_get(const char *base, enum bts_field field) |
443 | { | 513 | { |
444 | base += (ds_cfg.sizeof_field * field); | 514 | base += (ds_cfg.sizeof_ptr_field * field); |
445 | return *(unsigned long *)base; | 515 | return *(unsigned long *)base; |
446 | } | 516 | } |
447 | 517 | ||
448 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) | 518 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) |
449 | { | 519 | { |
450 | base += (ds_cfg.sizeof_field * field);; | 520 | base += (ds_cfg.sizeof_ptr_field * field);; |
451 | (*(unsigned long *)base) = val; | 521 | (*(unsigned long *)base) = val; |
452 | } | 522 | } |
453 | 523 | ||
@@ -463,8 +533,8 @@ static inline void bts_set(char *base, enum bts_field field, unsigned long val) | |||
463 | * | 533 | * |
464 | * return: bytes read/written on success; -Eerrno, otherwise | 534 | * return: bytes read/written on success; -Eerrno, otherwise |
465 | */ | 535 | */ |
466 | static int bts_read(struct bts_tracer *tracer, const void *at, | 536 | static int |
467 | struct bts_struct *out) | 537 | bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out) |
468 | { | 538 | { |
469 | if (!tracer) | 539 | if (!tracer) |
470 | return -EINVAL; | 540 | return -EINVAL; |
@@ -478,8 +548,8 @@ static int bts_read(struct bts_tracer *tracer, const void *at, | |||
478 | memset(out, 0, sizeof(*out)); | 548 | memset(out, 0, sizeof(*out)); |
479 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { | 549 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { |
480 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); | 550 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); |
481 | out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); | 551 | out->variant.event.clock = bts_get(at, bts_clock); |
482 | out->variant.timestamp.pid = bts_get(at, bts_pid); | 552 | out->variant.event.pid = bts_get(at, bts_pid); |
483 | } else { | 553 | } else { |
484 | out->qualifier = bts_branch; | 554 | out->qualifier = bts_branch; |
485 | out->variant.lbr.from = bts_get(at, bts_from); | 555 | out->variant.lbr.from = bts_get(at, bts_from); |
@@ -516,8 +586,8 @@ static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) | |||
516 | case bts_task_arrives: | 586 | case bts_task_arrives: |
517 | case bts_task_departs: | 587 | case bts_task_departs: |
518 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | 588 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); |
519 | bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); | 589 | bts_set(raw, bts_clock, in->variant.event.clock); |
520 | bts_set(raw, bts_pid, in->variant.timestamp.pid); | 590 | bts_set(raw, bts_pid, in->variant.event.pid); |
521 | break; | 591 | break; |
522 | default: | 592 | default: |
523 | return -EINVAL; | 593 | return -EINVAL; |
@@ -555,7 +625,8 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | |||
555 | unsigned int flags) { | 625 | unsigned int flags) { |
556 | unsigned long buffer, adj; | 626 | unsigned long buffer, adj; |
557 | 627 | ||
558 | /* adjust the buffer address and size to meet alignment | 628 | /* |
629 | * Adjust the buffer address and size to meet alignment | ||
559 | * constraints: | 630 | * constraints: |
560 | * - buffer is double-word aligned | 631 | * - buffer is double-word aligned |
561 | * - size is multiple of record size | 632 | * - size is multiple of record size |
@@ -577,9 +648,11 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | |||
577 | trace->begin = (void *)buffer; | 648 | trace->begin = (void *)buffer; |
578 | trace->top = trace->begin; | 649 | trace->top = trace->begin; |
579 | trace->end = (void *)(buffer + size); | 650 | trace->end = (void *)(buffer + size); |
580 | /* The value for 'no threshold' is -1, which will set the | 651 | /* |
652 | * The value for 'no threshold' is -1, which will set the | ||
581 | * threshold outside of the buffer, just like we want it. | 653 | * threshold outside of the buffer, just like we want it. |
582 | */ | 654 | */ |
655 | ith *= ds_cfg.sizeof_rec[qual]; | ||
583 | trace->ith = (void *)(buffer + size - ith); | 656 | trace->ith = (void *)(buffer + size - ith); |
584 | 657 | ||
585 | trace->flags = flags; | 658 | trace->flags = flags; |
@@ -588,18 +661,27 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | |||
588 | 661 | ||
589 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, | 662 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, |
590 | enum ds_qualifier qual, struct task_struct *task, | 663 | enum ds_qualifier qual, struct task_struct *task, |
591 | void *base, size_t size, size_t th, unsigned int flags) | 664 | int cpu, void *base, size_t size, size_t th) |
592 | { | 665 | { |
593 | struct ds_context *context; | 666 | struct ds_context *context; |
594 | int error; | 667 | int error; |
668 | size_t req_size; | ||
669 | |||
670 | error = -EOPNOTSUPP; | ||
671 | if (!ds_cfg.sizeof_rec[qual]) | ||
672 | goto out; | ||
595 | 673 | ||
596 | error = -EINVAL; | 674 | error = -EINVAL; |
597 | if (!base) | 675 | if (!base) |
598 | goto out; | 676 | goto out; |
599 | 677 | ||
600 | /* we require some space to do alignment adjustments below */ | 678 | req_size = ds_cfg.sizeof_rec[qual]; |
679 | /* We might need space for alignment adjustments. */ | ||
680 | if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT)) | ||
681 | req_size += DS_ALIGNMENT; | ||
682 | |||
601 | error = -EINVAL; | 683 | error = -EINVAL; |
602 | if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) | 684 | if (size < req_size) |
603 | goto out; | 685 | goto out; |
604 | 686 | ||
605 | if (th != (size_t)-1) { | 687 | if (th != (size_t)-1) { |
@@ -614,182 +696,318 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, | |||
614 | tracer->size = size; | 696 | tracer->size = size; |
615 | 697 | ||
616 | error = -ENOMEM; | 698 | error = -ENOMEM; |
617 | context = ds_get_context(task); | 699 | context = ds_get_context(task, cpu); |
618 | if (!context) | 700 | if (!context) |
619 | goto out; | 701 | goto out; |
620 | tracer->context = context; | 702 | tracer->context = context; |
621 | 703 | ||
622 | ds_init_ds_trace(trace, qual, base, size, th, flags); | 704 | /* |
705 | * Defer any tracer-specific initialization work for the context until | ||
706 | * context ownership has been clarified. | ||
707 | */ | ||
623 | 708 | ||
624 | error = 0; | 709 | error = 0; |
625 | out: | 710 | out: |
626 | return error; | 711 | return error; |
627 | } | 712 | } |
628 | 713 | ||
629 | struct bts_tracer *ds_request_bts(struct task_struct *task, | 714 | static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu, |
630 | void *base, size_t size, | 715 | void *base, size_t size, |
631 | bts_ovfl_callback_t ovfl, size_t th, | 716 | bts_ovfl_callback_t ovfl, size_t th, |
632 | unsigned int flags) | 717 | unsigned int flags) |
633 | { | 718 | { |
634 | struct bts_tracer *tracer; | 719 | struct bts_tracer *tracer; |
635 | unsigned long irq; | ||
636 | int error; | 720 | int error; |
637 | 721 | ||
722 | /* Buffer overflow notification is not yet implemented. */ | ||
638 | error = -EOPNOTSUPP; | 723 | error = -EOPNOTSUPP; |
639 | if (!ds_cfg.ctl[dsf_bts]) | 724 | if (ovfl) |
640 | goto out; | 725 | goto out; |
641 | 726 | ||
642 | /* buffer overflow notification is not yet implemented */ | 727 | error = get_tracer(task); |
643 | error = -EOPNOTSUPP; | 728 | if (error < 0) |
644 | if (ovfl) | ||
645 | goto out; | 729 | goto out; |
646 | 730 | ||
647 | error = -ENOMEM; | 731 | error = -ENOMEM; |
648 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | 732 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
649 | if (!tracer) | 733 | if (!tracer) |
650 | goto out; | 734 | goto out_put_tracer; |
651 | tracer->ovfl = ovfl; | 735 | tracer->ovfl = ovfl; |
652 | 736 | ||
737 | /* Do some more error checking and acquire a tracing context. */ | ||
653 | error = ds_request(&tracer->ds, &tracer->trace.ds, | 738 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
654 | ds_bts, task, base, size, th, flags); | 739 | ds_bts, task, cpu, base, size, th); |
655 | if (error < 0) | 740 | if (error < 0) |
656 | goto out_tracer; | 741 | goto out_tracer; |
657 | 742 | ||
658 | 743 | /* Claim the bts part of the tracing context we acquired above. */ | |
659 | spin_lock_irqsave(&ds_lock, irq); | 744 | spin_lock_irq(&ds_lock); |
660 | |||
661 | error = -EPERM; | ||
662 | if (!check_tracer(task)) | ||
663 | goto out_unlock; | ||
664 | get_tracer(task); | ||
665 | 745 | ||
666 | error = -EPERM; | 746 | error = -EPERM; |
667 | if (tracer->ds.context->bts_master) | 747 | if (tracer->ds.context->bts_master) |
668 | goto out_put_tracer; | 748 | goto out_unlock; |
669 | tracer->ds.context->bts_master = tracer; | 749 | tracer->ds.context->bts_master = tracer; |
670 | 750 | ||
671 | spin_unlock_irqrestore(&ds_lock, irq); | 751 | spin_unlock_irq(&ds_lock); |
672 | 752 | ||
753 | /* | ||
754 | * Now that we own the bts part of the context, let's complete the | ||
755 | * initialization for that part. | ||
756 | */ | ||
757 | ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags); | ||
758 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
759 | ds_install_ds_area(tracer->ds.context); | ||
673 | 760 | ||
674 | tracer->trace.read = bts_read; | 761 | tracer->trace.read = bts_read; |
675 | tracer->trace.write = bts_write; | 762 | tracer->trace.write = bts_write; |
676 | 763 | ||
677 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | 764 | /* Start tracing. */ |
678 | ds_resume_bts(tracer); | 765 | ds_resume_bts(tracer); |
679 | 766 | ||
680 | return tracer; | 767 | return tracer; |
681 | 768 | ||
682 | out_put_tracer: | ||
683 | put_tracer(task); | ||
684 | out_unlock: | 769 | out_unlock: |
685 | spin_unlock_irqrestore(&ds_lock, irq); | 770 | spin_unlock_irq(&ds_lock); |
686 | ds_put_context(tracer->ds.context); | 771 | ds_put_context(tracer->ds.context); |
687 | out_tracer: | 772 | out_tracer: |
688 | kfree(tracer); | 773 | kfree(tracer); |
774 | out_put_tracer: | ||
775 | put_tracer(task); | ||
689 | out: | 776 | out: |
690 | return ERR_PTR(error); | 777 | return ERR_PTR(error); |
691 | } | 778 | } |
692 | 779 | ||
693 | struct pebs_tracer *ds_request_pebs(struct task_struct *task, | 780 | struct bts_tracer *ds_request_bts_task(struct task_struct *task, |
694 | void *base, size_t size, | 781 | void *base, size_t size, |
695 | pebs_ovfl_callback_t ovfl, size_t th, | 782 | bts_ovfl_callback_t ovfl, |
696 | unsigned int flags) | 783 | size_t th, unsigned int flags) |
784 | { | ||
785 | return ds_request_bts(task, 0, base, size, ovfl, th, flags); | ||
786 | } | ||
787 | |||
788 | struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, | ||
789 | bts_ovfl_callback_t ovfl, | ||
790 | size_t th, unsigned int flags) | ||
791 | { | ||
792 | return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags); | ||
793 | } | ||
794 | |||
795 | static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu, | ||
796 | void *base, size_t size, | ||
797 | pebs_ovfl_callback_t ovfl, size_t th, | ||
798 | unsigned int flags) | ||
697 | { | 799 | { |
698 | struct pebs_tracer *tracer; | 800 | struct pebs_tracer *tracer; |
699 | unsigned long irq; | ||
700 | int error; | 801 | int error; |
701 | 802 | ||
702 | /* buffer overflow notification is not yet implemented */ | 803 | /* Buffer overflow notification is not yet implemented. */ |
703 | error = -EOPNOTSUPP; | 804 | error = -EOPNOTSUPP; |
704 | if (ovfl) | 805 | if (ovfl) |
705 | goto out; | 806 | goto out; |
706 | 807 | ||
808 | error = get_tracer(task); | ||
809 | if (error < 0) | ||
810 | goto out; | ||
811 | |||
707 | error = -ENOMEM; | 812 | error = -ENOMEM; |
708 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | 813 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
709 | if (!tracer) | 814 | if (!tracer) |
710 | goto out; | 815 | goto out_put_tracer; |
711 | tracer->ovfl = ovfl; | 816 | tracer->ovfl = ovfl; |
712 | 817 | ||
818 | /* Do some more error checking and acquire a tracing context. */ | ||
713 | error = ds_request(&tracer->ds, &tracer->trace.ds, | 819 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
714 | ds_pebs, task, base, size, th, flags); | 820 | ds_pebs, task, cpu, base, size, th); |
715 | if (error < 0) | 821 | if (error < 0) |
716 | goto out_tracer; | 822 | goto out_tracer; |
717 | 823 | ||
718 | spin_lock_irqsave(&ds_lock, irq); | 824 | /* Claim the pebs part of the tracing context we acquired above. */ |
719 | 825 | spin_lock_irq(&ds_lock); | |
720 | error = -EPERM; | ||
721 | if (!check_tracer(task)) | ||
722 | goto out_unlock; | ||
723 | get_tracer(task); | ||
724 | 826 | ||
725 | error = -EPERM; | 827 | error = -EPERM; |
726 | if (tracer->ds.context->pebs_master) | 828 | if (tracer->ds.context->pebs_master) |
727 | goto out_put_tracer; | 829 | goto out_unlock; |
728 | tracer->ds.context->pebs_master = tracer; | 830 | tracer->ds.context->pebs_master = tracer; |
729 | 831 | ||
730 | spin_unlock_irqrestore(&ds_lock, irq); | 832 | spin_unlock_irq(&ds_lock); |
731 | 833 | ||
834 | /* | ||
835 | * Now that we own the pebs part of the context, let's complete the | ||
836 | * initialization for that part. | ||
837 | */ | ||
838 | ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags); | ||
732 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | 839 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); |
840 | ds_install_ds_area(tracer->ds.context); | ||
841 | |||
842 | /* Start tracing. */ | ||
733 | ds_resume_pebs(tracer); | 843 | ds_resume_pebs(tracer); |
734 | 844 | ||
735 | return tracer; | 845 | return tracer; |
736 | 846 | ||
737 | out_put_tracer: | ||
738 | put_tracer(task); | ||
739 | out_unlock: | 847 | out_unlock: |
740 | spin_unlock_irqrestore(&ds_lock, irq); | 848 | spin_unlock_irq(&ds_lock); |
741 | ds_put_context(tracer->ds.context); | 849 | ds_put_context(tracer->ds.context); |
742 | out_tracer: | 850 | out_tracer: |
743 | kfree(tracer); | 851 | kfree(tracer); |
852 | out_put_tracer: | ||
853 | put_tracer(task); | ||
744 | out: | 854 | out: |
745 | return ERR_PTR(error); | 855 | return ERR_PTR(error); |
746 | } | 856 | } |
747 | 857 | ||
748 | void ds_release_bts(struct bts_tracer *tracer) | 858 | struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, |
859 | void *base, size_t size, | ||
860 | pebs_ovfl_callback_t ovfl, | ||
861 | size_t th, unsigned int flags) | ||
749 | { | 862 | { |
750 | if (!tracer) | 863 | return ds_request_pebs(task, 0, base, size, ovfl, th, flags); |
751 | return; | 864 | } |
752 | 865 | ||
753 | ds_suspend_bts(tracer); | 866 | struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size, |
867 | pebs_ovfl_callback_t ovfl, | ||
868 | size_t th, unsigned int flags) | ||
869 | { | ||
870 | return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags); | ||
871 | } | ||
872 | |||
873 | static void ds_free_bts(struct bts_tracer *tracer) | ||
874 | { | ||
875 | struct task_struct *task; | ||
876 | |||
877 | task = tracer->ds.context->task; | ||
754 | 878 | ||
755 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); | 879 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); |
756 | tracer->ds.context->bts_master = NULL; | 880 | tracer->ds.context->bts_master = NULL; |
757 | 881 | ||
758 | put_tracer(tracer->ds.context->task); | 882 | /* Make sure tracing stopped and the tracer is not in use. */ |
883 | if (task && (task != current)) | ||
884 | wait_task_context_switch(task); | ||
885 | |||
759 | ds_put_context(tracer->ds.context); | 886 | ds_put_context(tracer->ds.context); |
887 | put_tracer(task); | ||
760 | 888 | ||
761 | kfree(tracer); | 889 | kfree(tracer); |
762 | } | 890 | } |
763 | 891 | ||
892 | void ds_release_bts(struct bts_tracer *tracer) | ||
893 | { | ||
894 | might_sleep(); | ||
895 | |||
896 | if (!tracer) | ||
897 | return; | ||
898 | |||
899 | ds_suspend_bts(tracer); | ||
900 | ds_free_bts(tracer); | ||
901 | } | ||
902 | |||
903 | int ds_release_bts_noirq(struct bts_tracer *tracer) | ||
904 | { | ||
905 | struct task_struct *task; | ||
906 | unsigned long irq; | ||
907 | int error; | ||
908 | |||
909 | if (!tracer) | ||
910 | return 0; | ||
911 | |||
912 | task = tracer->ds.context->task; | ||
913 | |||
914 | local_irq_save(irq); | ||
915 | |||
916 | error = -EPERM; | ||
917 | if (!task && | ||
918 | (tracer->ds.context->cpu != smp_processor_id())) | ||
919 | goto out; | ||
920 | |||
921 | error = -EPERM; | ||
922 | if (task && (task != current)) | ||
923 | goto out; | ||
924 | |||
925 | ds_suspend_bts_noirq(tracer); | ||
926 | ds_free_bts(tracer); | ||
927 | |||
928 | error = 0; | ||
929 | out: | ||
930 | local_irq_restore(irq); | ||
931 | return error; | ||
932 | } | ||
933 | |||
934 | static void update_task_debugctlmsr(struct task_struct *task, | ||
935 | unsigned long debugctlmsr) | ||
936 | { | ||
937 | task->thread.debugctlmsr = debugctlmsr; | ||
938 | |||
939 | get_cpu(); | ||
940 | if (task == current) | ||
941 | update_debugctlmsr(debugctlmsr); | ||
942 | put_cpu(); | ||
943 | } | ||
944 | |||
764 | void ds_suspend_bts(struct bts_tracer *tracer) | 945 | void ds_suspend_bts(struct bts_tracer *tracer) |
765 | { | 946 | { |
766 | struct task_struct *task; | 947 | struct task_struct *task; |
948 | unsigned long debugctlmsr; | ||
949 | int cpu; | ||
767 | 950 | ||
768 | if (!tracer) | 951 | if (!tracer) |
769 | return; | 952 | return; |
770 | 953 | ||
954 | tracer->flags = 0; | ||
955 | |||
771 | task = tracer->ds.context->task; | 956 | task = tracer->ds.context->task; |
957 | cpu = tracer->ds.context->cpu; | ||
772 | 958 | ||
773 | if (!task || (task == current)) | 959 | WARN_ON(!task && irqs_disabled()); |
774 | update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); | ||
775 | 960 | ||
776 | if (task) { | 961 | debugctlmsr = (task ? |
777 | task->thread.debugctlmsr &= ~BTS_CONTROL; | 962 | task->thread.debugctlmsr : |
963 | get_debugctlmsr_on_cpu(cpu)); | ||
964 | debugctlmsr &= ~BTS_CONTROL; | ||
778 | 965 | ||
779 | if (!task->thread.debugctlmsr) | 966 | if (task) |
780 | clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | 967 | update_task_debugctlmsr(task, debugctlmsr); |
781 | } | 968 | else |
969 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
782 | } | 970 | } |
783 | 971 | ||
784 | void ds_resume_bts(struct bts_tracer *tracer) | 972 | int ds_suspend_bts_noirq(struct bts_tracer *tracer) |
785 | { | 973 | { |
786 | struct task_struct *task; | 974 | struct task_struct *task; |
787 | unsigned long control; | 975 | unsigned long debugctlmsr, irq; |
976 | int cpu, error = 0; | ||
788 | 977 | ||
789 | if (!tracer) | 978 | if (!tracer) |
790 | return; | 979 | return 0; |
980 | |||
981 | tracer->flags = 0; | ||
791 | 982 | ||
792 | task = tracer->ds.context->task; | 983 | task = tracer->ds.context->task; |
984 | cpu = tracer->ds.context->cpu; | ||
985 | |||
986 | local_irq_save(irq); | ||
987 | |||
988 | error = -EPERM; | ||
989 | if (!task && (cpu != smp_processor_id())) | ||
990 | goto out; | ||
991 | |||
992 | debugctlmsr = (task ? | ||
993 | task->thread.debugctlmsr : | ||
994 | get_debugctlmsr()); | ||
995 | debugctlmsr &= ~BTS_CONTROL; | ||
996 | |||
997 | if (task) | ||
998 | update_task_debugctlmsr(task, debugctlmsr); | ||
999 | else | ||
1000 | update_debugctlmsr(debugctlmsr); | ||
1001 | |||
1002 | error = 0; | ||
1003 | out: | ||
1004 | local_irq_restore(irq); | ||
1005 | return error; | ||
1006 | } | ||
1007 | |||
1008 | static unsigned long ds_bts_control(struct bts_tracer *tracer) | ||
1009 | { | ||
1010 | unsigned long control; | ||
793 | 1011 | ||
794 | control = ds_cfg.ctl[dsf_bts]; | 1012 | control = ds_cfg.ctl[dsf_bts]; |
795 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) | 1013 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) |
@@ -797,41 +1015,149 @@ void ds_resume_bts(struct bts_tracer *tracer) | |||
797 | if (!(tracer->trace.ds.flags & BTS_USER)) | 1015 | if (!(tracer->trace.ds.flags & BTS_USER)) |
798 | control |= ds_cfg.ctl[dsf_bts_user]; | 1016 | control |= ds_cfg.ctl[dsf_bts_user]; |
799 | 1017 | ||
800 | if (task) { | 1018 | return control; |
801 | task->thread.debugctlmsr |= control; | ||
802 | set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | ||
803 | } | ||
804 | |||
805 | if (!task || (task == current)) | ||
806 | update_debugctlmsr(get_debugctlmsr() | control); | ||
807 | } | 1019 | } |
808 | 1020 | ||
809 | void ds_release_pebs(struct pebs_tracer *tracer) | 1021 | void ds_resume_bts(struct bts_tracer *tracer) |
810 | { | 1022 | { |
1023 | struct task_struct *task; | ||
1024 | unsigned long debugctlmsr; | ||
1025 | int cpu; | ||
1026 | |||
811 | if (!tracer) | 1027 | if (!tracer) |
812 | return; | 1028 | return; |
813 | 1029 | ||
814 | ds_suspend_pebs(tracer); | 1030 | tracer->flags = tracer->trace.ds.flags; |
1031 | |||
1032 | task = tracer->ds.context->task; | ||
1033 | cpu = tracer->ds.context->cpu; | ||
1034 | |||
1035 | WARN_ON(!task && irqs_disabled()); | ||
1036 | |||
1037 | debugctlmsr = (task ? | ||
1038 | task->thread.debugctlmsr : | ||
1039 | get_debugctlmsr_on_cpu(cpu)); | ||
1040 | debugctlmsr |= ds_bts_control(tracer); | ||
1041 | |||
1042 | if (task) | ||
1043 | update_task_debugctlmsr(task, debugctlmsr); | ||
1044 | else | ||
1045 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
1046 | } | ||
1047 | |||
1048 | int ds_resume_bts_noirq(struct bts_tracer *tracer) | ||
1049 | { | ||
1050 | struct task_struct *task; | ||
1051 | unsigned long debugctlmsr, irq; | ||
1052 | int cpu, error = 0; | ||
1053 | |||
1054 | if (!tracer) | ||
1055 | return 0; | ||
1056 | |||
1057 | tracer->flags = tracer->trace.ds.flags; | ||
1058 | |||
1059 | task = tracer->ds.context->task; | ||
1060 | cpu = tracer->ds.context->cpu; | ||
1061 | |||
1062 | local_irq_save(irq); | ||
1063 | |||
1064 | error = -EPERM; | ||
1065 | if (!task && (cpu != smp_processor_id())) | ||
1066 | goto out; | ||
1067 | |||
1068 | debugctlmsr = (task ? | ||
1069 | task->thread.debugctlmsr : | ||
1070 | get_debugctlmsr()); | ||
1071 | debugctlmsr |= ds_bts_control(tracer); | ||
1072 | |||
1073 | if (task) | ||
1074 | update_task_debugctlmsr(task, debugctlmsr); | ||
1075 | else | ||
1076 | update_debugctlmsr(debugctlmsr); | ||
1077 | |||
1078 | error = 0; | ||
1079 | out: | ||
1080 | local_irq_restore(irq); | ||
1081 | return error; | ||
1082 | } | ||
1083 | |||
1084 | static void ds_free_pebs(struct pebs_tracer *tracer) | ||
1085 | { | ||
1086 | struct task_struct *task; | ||
1087 | |||
1088 | task = tracer->ds.context->task; | ||
815 | 1089 | ||
816 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); | 1090 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); |
817 | tracer->ds.context->pebs_master = NULL; | 1091 | tracer->ds.context->pebs_master = NULL; |
818 | 1092 | ||
819 | put_tracer(tracer->ds.context->task); | ||
820 | ds_put_context(tracer->ds.context); | 1093 | ds_put_context(tracer->ds.context); |
1094 | put_tracer(task); | ||
821 | 1095 | ||
822 | kfree(tracer); | 1096 | kfree(tracer); |
823 | } | 1097 | } |
824 | 1098 | ||
1099 | void ds_release_pebs(struct pebs_tracer *tracer) | ||
1100 | { | ||
1101 | might_sleep(); | ||
1102 | |||
1103 | if (!tracer) | ||
1104 | return; | ||
1105 | |||
1106 | ds_suspend_pebs(tracer); | ||
1107 | ds_free_pebs(tracer); | ||
1108 | } | ||
1109 | |||
1110 | int ds_release_pebs_noirq(struct pebs_tracer *tracer) | ||
1111 | { | ||
1112 | struct task_struct *task; | ||
1113 | unsigned long irq; | ||
1114 | int error; | ||
1115 | |||
1116 | if (!tracer) | ||
1117 | return 0; | ||
1118 | |||
1119 | task = tracer->ds.context->task; | ||
1120 | |||
1121 | local_irq_save(irq); | ||
1122 | |||
1123 | error = -EPERM; | ||
1124 | if (!task && | ||
1125 | (tracer->ds.context->cpu != smp_processor_id())) | ||
1126 | goto out; | ||
1127 | |||
1128 | error = -EPERM; | ||
1129 | if (task && (task != current)) | ||
1130 | goto out; | ||
1131 | |||
1132 | ds_suspend_pebs_noirq(tracer); | ||
1133 | ds_free_pebs(tracer); | ||
1134 | |||
1135 | error = 0; | ||
1136 | out: | ||
1137 | local_irq_restore(irq); | ||
1138 | return error; | ||
1139 | } | ||
1140 | |||
825 | void ds_suspend_pebs(struct pebs_tracer *tracer) | 1141 | void ds_suspend_pebs(struct pebs_tracer *tracer) |
826 | { | 1142 | { |
827 | 1143 | ||
828 | } | 1144 | } |
829 | 1145 | ||
1146 | int ds_suspend_pebs_noirq(struct pebs_tracer *tracer) | ||
1147 | { | ||
1148 | return 0; | ||
1149 | } | ||
1150 | |||
830 | void ds_resume_pebs(struct pebs_tracer *tracer) | 1151 | void ds_resume_pebs(struct pebs_tracer *tracer) |
831 | { | 1152 | { |
832 | 1153 | ||
833 | } | 1154 | } |
834 | 1155 | ||
1156 | int ds_resume_pebs_noirq(struct pebs_tracer *tracer) | ||
1157 | { | ||
1158 | return 0; | ||
1159 | } | ||
1160 | |||
835 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) | 1161 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) |
836 | { | 1162 | { |
837 | if (!tracer) | 1163 | if (!tracer) |
@@ -847,8 +1173,12 @@ const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) | |||
847 | return NULL; | 1173 | return NULL; |
848 | 1174 | ||
849 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | 1175 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); |
850 | tracer->trace.reset_value = | 1176 | |
851 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); | 1177 | tracer->trace.counters = ds_cfg.nr_counter_reset; |
1178 | memcpy(tracer->trace.counter_reset, | ||
1179 | tracer->ds.context->ds + | ||
1180 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field), | ||
1181 | ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE); | ||
852 | 1182 | ||
853 | return &tracer->trace; | 1183 | return &tracer->trace; |
854 | } | 1184 | } |
@@ -873,18 +1203,24 @@ int ds_reset_pebs(struct pebs_tracer *tracer) | |||
873 | 1203 | ||
874 | tracer->trace.ds.top = tracer->trace.ds.begin; | 1204 | tracer->trace.ds.top = tracer->trace.ds.begin; |
875 | 1205 | ||
876 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, | 1206 | ds_set(tracer->ds.context->ds, ds_pebs, ds_index, |
877 | (unsigned long)tracer->trace.ds.top); | 1207 | (unsigned long)tracer->trace.ds.top); |
878 | 1208 | ||
879 | return 0; | 1209 | return 0; |
880 | } | 1210 | } |
881 | 1211 | ||
882 | int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) | 1212 | int ds_set_pebs_reset(struct pebs_tracer *tracer, |
1213 | unsigned int counter, u64 value) | ||
883 | { | 1214 | { |
884 | if (!tracer) | 1215 | if (!tracer) |
885 | return -EINVAL; | 1216 | return -EINVAL; |
886 | 1217 | ||
887 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; | 1218 | if (ds_cfg.nr_counter_reset < counter) |
1219 | return -EINVAL; | ||
1220 | |||
1221 | *(u64 *)(tracer->ds.context->ds + | ||
1222 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) + | ||
1223 | (counter * PEBS_RESET_FIELD_SIZE)) = value; | ||
888 | 1224 | ||
889 | return 0; | 1225 | return 0; |
890 | } | 1226 | } |
@@ -894,73 +1230,117 @@ static const struct ds_configuration ds_cfg_netburst = { | |||
894 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), | 1230 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), |
895 | .ctl[dsf_bts_kernel] = (1 << 5), | 1231 | .ctl[dsf_bts_kernel] = (1 << 5), |
896 | .ctl[dsf_bts_user] = (1 << 6), | 1232 | .ctl[dsf_bts_user] = (1 << 6), |
897 | 1233 | .nr_counter_reset = 1, | |
898 | .sizeof_field = sizeof(long), | ||
899 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
900 | #ifdef __i386__ | ||
901 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, | ||
902 | #else | ||
903 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, | ||
904 | #endif | ||
905 | }; | 1234 | }; |
906 | static const struct ds_configuration ds_cfg_pentium_m = { | 1235 | static const struct ds_configuration ds_cfg_pentium_m = { |
907 | .name = "Pentium M", | 1236 | .name = "Pentium M", |
908 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | 1237 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
909 | 1238 | .nr_counter_reset = 1, | |
910 | .sizeof_field = sizeof(long), | ||
911 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
912 | #ifdef __i386__ | ||
913 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, | ||
914 | #else | ||
915 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, | ||
916 | #endif | ||
917 | }; | 1239 | }; |
918 | static const struct ds_configuration ds_cfg_core2_atom = { | 1240 | static const struct ds_configuration ds_cfg_core2_atom = { |
919 | .name = "Core 2/Atom", | 1241 | .name = "Core 2/Atom", |
920 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | 1242 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
921 | .ctl[dsf_bts_kernel] = (1 << 9), | 1243 | .ctl[dsf_bts_kernel] = (1 << 9), |
922 | .ctl[dsf_bts_user] = (1 << 10), | 1244 | .ctl[dsf_bts_user] = (1 << 10), |
923 | 1245 | .nr_counter_reset = 1, | |
924 | .sizeof_field = 8, | 1246 | }; |
925 | .sizeof_rec[ds_bts] = 8 * 3, | 1247 | static const struct ds_configuration ds_cfg_core_i7 = { |
926 | .sizeof_rec[ds_pebs] = 8 * 18, | 1248 | .name = "Core i7", |
1249 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
1250 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
1251 | .ctl[dsf_bts_user] = (1 << 10), | ||
1252 | .nr_counter_reset = 4, | ||
927 | }; | 1253 | }; |
928 | 1254 | ||
929 | static void | 1255 | static void |
930 | ds_configure(const struct ds_configuration *cfg) | 1256 | ds_configure(const struct ds_configuration *cfg, |
1257 | struct cpuinfo_x86 *cpu) | ||
931 | { | 1258 | { |
1259 | unsigned long nr_pebs_fields = 0; | ||
1260 | |||
1261 | printk(KERN_INFO "[ds] using %s configuration\n", cfg->name); | ||
1262 | |||
1263 | #ifdef __i386__ | ||
1264 | nr_pebs_fields = 10; | ||
1265 | #else | ||
1266 | nr_pebs_fields = 18; | ||
1267 | #endif | ||
1268 | |||
1269 | /* | ||
1270 | * Starting with version 2, architectural performance | ||
1271 | * monitoring supports a format specifier. | ||
1272 | */ | ||
1273 | if ((cpuid_eax(0xa) & 0xff) > 1) { | ||
1274 | unsigned long perf_capabilities, format; | ||
1275 | |||
1276 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities); | ||
1277 | |||
1278 | format = (perf_capabilities >> 8) & 0xf; | ||
1279 | |||
1280 | switch (format) { | ||
1281 | case 0: | ||
1282 | nr_pebs_fields = 18; | ||
1283 | break; | ||
1284 | case 1: | ||
1285 | nr_pebs_fields = 22; | ||
1286 | break; | ||
1287 | default: | ||
1288 | printk(KERN_INFO | ||
1289 | "[ds] unknown PEBS format: %lu\n", format); | ||
1290 | nr_pebs_fields = 0; | ||
1291 | break; | ||
1292 | } | ||
1293 | } | ||
1294 | |||
932 | memset(&ds_cfg, 0, sizeof(ds_cfg)); | 1295 | memset(&ds_cfg, 0, sizeof(ds_cfg)); |
933 | ds_cfg = *cfg; | 1296 | ds_cfg = *cfg; |
934 | 1297 | ||
935 | printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); | 1298 | ds_cfg.sizeof_ptr_field = |
1299 | (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4); | ||
1300 | |||
1301 | ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3; | ||
1302 | ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields; | ||
936 | 1303 | ||
937 | if (!cpu_has_bts) { | 1304 | if (!cpu_has(cpu, X86_FEATURE_BTS)) { |
938 | ds_cfg.ctl[dsf_bts] = 0; | 1305 | ds_cfg.sizeof_rec[ds_bts] = 0; |
939 | printk(KERN_INFO "[ds] bts not available\n"); | 1306 | printk(KERN_INFO "[ds] bts not available\n"); |
940 | } | 1307 | } |
941 | if (!cpu_has_pebs) | 1308 | if (!cpu_has(cpu, X86_FEATURE_PEBS)) { |
1309 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
942 | printk(KERN_INFO "[ds] pebs not available\n"); | 1310 | printk(KERN_INFO "[ds] pebs not available\n"); |
1311 | } | ||
1312 | |||
1313 | printk(KERN_INFO "[ds] sizes: address: %u bit, ", | ||
1314 | 8 * ds_cfg.sizeof_ptr_field); | ||
1315 | printk("bts/pebs record: %u/%u bytes\n", | ||
1316 | ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]); | ||
943 | 1317 | ||
944 | WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); | 1318 | WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset); |
945 | } | 1319 | } |
946 | 1320 | ||
947 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | 1321 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) |
948 | { | 1322 | { |
1323 | /* Only configure the first cpu. Others are identical. */ | ||
1324 | if (ds_cfg.name) | ||
1325 | return; | ||
1326 | |||
949 | switch (c->x86) { | 1327 | switch (c->x86) { |
950 | case 0x6: | 1328 | case 0x6: |
951 | switch (c->x86_model) { | 1329 | switch (c->x86_model) { |
952 | case 0x9: | 1330 | case 0x9: |
953 | case 0xd: /* Pentium M */ | 1331 | case 0xd: /* Pentium M */ |
954 | ds_configure(&ds_cfg_pentium_m); | 1332 | ds_configure(&ds_cfg_pentium_m, c); |
955 | break; | 1333 | break; |
956 | case 0xf: | 1334 | case 0xf: |
957 | case 0x17: /* Core2 */ | 1335 | case 0x17: /* Core2 */ |
958 | case 0x1c: /* Atom */ | 1336 | case 0x1c: /* Atom */ |
959 | ds_configure(&ds_cfg_core2_atom); | 1337 | ds_configure(&ds_cfg_core2_atom, c); |
1338 | break; | ||
1339 | case 0x1a: /* Core i7 */ | ||
1340 | ds_configure(&ds_cfg_core_i7, c); | ||
960 | break; | 1341 | break; |
961 | case 0x1a: /* i7 */ | ||
962 | default: | 1342 | default: |
963 | /* sorry, don't know about them */ | 1343 | /* Sorry, don't know about them. */ |
964 | break; | 1344 | break; |
965 | } | 1345 | } |
966 | break; | 1346 | break; |
@@ -969,64 +1349,89 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
969 | case 0x0: | 1349 | case 0x0: |
970 | case 0x1: | 1350 | case 0x1: |
971 | case 0x2: /* Netburst */ | 1351 | case 0x2: /* Netburst */ |
972 | ds_configure(&ds_cfg_netburst); | 1352 | ds_configure(&ds_cfg_netburst, c); |
973 | break; | 1353 | break; |
974 | default: | 1354 | default: |
975 | /* sorry, don't know about them */ | 1355 | /* Sorry, don't know about them. */ |
976 | break; | 1356 | break; |
977 | } | 1357 | } |
978 | break; | 1358 | break; |
979 | default: | 1359 | default: |
980 | /* sorry, don't know about them */ | 1360 | /* Sorry, don't know about them. */ |
981 | break; | 1361 | break; |
982 | } | 1362 | } |
983 | } | 1363 | } |
984 | 1364 | ||
1365 | static inline void ds_take_timestamp(struct ds_context *context, | ||
1366 | enum bts_qualifier qualifier, | ||
1367 | struct task_struct *task) | ||
1368 | { | ||
1369 | struct bts_tracer *tracer = context->bts_master; | ||
1370 | struct bts_struct ts; | ||
1371 | |||
1372 | /* Prevent compilers from reading the tracer pointer twice. */ | ||
1373 | barrier(); | ||
1374 | |||
1375 | if (!tracer || !(tracer->flags & BTS_TIMESTAMPS)) | ||
1376 | return; | ||
1377 | |||
1378 | memset(&ts, 0, sizeof(ts)); | ||
1379 | ts.qualifier = qualifier; | ||
1380 | ts.variant.event.clock = trace_clock_global(); | ||
1381 | ts.variant.event.pid = task->pid; | ||
1382 | |||
1383 | bts_write(tracer, &ts); | ||
1384 | } | ||
1385 | |||
985 | /* | 1386 | /* |
986 | * Change the DS configuration from tracing prev to tracing next. | 1387 | * Change the DS configuration from tracing prev to tracing next. |
987 | */ | 1388 | */ |
988 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | 1389 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) |
989 | { | 1390 | { |
990 | struct ds_context *prev_ctx = prev->thread.ds_ctx; | 1391 | struct ds_context *prev_ctx = prev->thread.ds_ctx; |
991 | struct ds_context *next_ctx = next->thread.ds_ctx; | 1392 | struct ds_context *next_ctx = next->thread.ds_ctx; |
1393 | unsigned long debugctlmsr = next->thread.debugctlmsr; | ||
1394 | |||
1395 | /* Make sure all data is read before we start. */ | ||
1396 | barrier(); | ||
992 | 1397 | ||
993 | if (prev_ctx) { | 1398 | if (prev_ctx) { |
994 | update_debugctlmsr(0); | 1399 | update_debugctlmsr(0); |
995 | 1400 | ||
996 | if (prev_ctx->bts_master && | 1401 | ds_take_timestamp(prev_ctx, bts_task_departs, prev); |
997 | (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
998 | struct bts_struct ts = { | ||
999 | .qualifier = bts_task_departs, | ||
1000 | .variant.timestamp.jiffies = jiffies_64, | ||
1001 | .variant.timestamp.pid = prev->pid | ||
1002 | }; | ||
1003 | bts_write(prev_ctx->bts_master, &ts); | ||
1004 | } | ||
1005 | } | 1402 | } |
1006 | 1403 | ||
1007 | if (next_ctx) { | 1404 | if (next_ctx) { |
1008 | if (next_ctx->bts_master && | 1405 | ds_take_timestamp(next_ctx, bts_task_arrives, next); |
1009 | (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
1010 | struct bts_struct ts = { | ||
1011 | .qualifier = bts_task_arrives, | ||
1012 | .variant.timestamp.jiffies = jiffies_64, | ||
1013 | .variant.timestamp.pid = next->pid | ||
1014 | }; | ||
1015 | bts_write(next_ctx->bts_master, &ts); | ||
1016 | } | ||
1017 | 1406 | ||
1018 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | 1407 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); |
1019 | } | 1408 | } |
1020 | 1409 | ||
1021 | update_debugctlmsr(next->thread.debugctlmsr); | 1410 | update_debugctlmsr(debugctlmsr); |
1022 | } | 1411 | } |
1023 | 1412 | ||
1024 | void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) | 1413 | static __init int ds_selftest(void) |
1025 | { | 1414 | { |
1026 | clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); | 1415 | if (ds_cfg.sizeof_rec[ds_bts]) { |
1027 | tsk->thread.ds_ctx = NULL; | 1416 | int error; |
1028 | } | ||
1029 | 1417 | ||
1030 | void ds_exit_thread(struct task_struct *tsk) | 1418 | error = ds_selftest_bts(); |
1031 | { | 1419 | if (error) { |
1420 | WARN(1, "[ds] selftest failed. disabling bts.\n"); | ||
1421 | ds_cfg.sizeof_rec[ds_bts] = 0; | ||
1422 | } | ||
1423 | } | ||
1424 | |||
1425 | if (ds_cfg.sizeof_rec[ds_pebs]) { | ||
1426 | int error; | ||
1427 | |||
1428 | error = ds_selftest_pebs(); | ||
1429 | if (error) { | ||
1430 | WARN(1, "[ds] selftest failed. disabling pebs.\n"); | ||
1431 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
1432 | } | ||
1433 | } | ||
1434 | |||
1435 | return 0; | ||
1032 | } | 1436 | } |
1437 | device_initcall(ds_selftest); | ||
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c new file mode 100644 index 000000000000..6bc7c199ab99 --- /dev/null +++ b/arch/x86/kernel/ds_selftest.c | |||
@@ -0,0 +1,408 @@ | |||
1 | /* | ||
2 | * Debug Store support - selftest | ||
3 | * | ||
4 | * | ||
5 | * Copyright (C) 2009 Intel Corporation. | ||
6 | * Markus Metzger <markus.t.metzger@intel.com>, 2009 | ||
7 | */ | ||
8 | |||
9 | #include "ds_selftest.h" | ||
10 | |||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/string.h> | ||
13 | #include <linux/smp.h> | ||
14 | #include <linux/cpu.h> | ||
15 | |||
16 | #include <asm/ds.h> | ||
17 | |||
18 | |||
19 | #define BUFFER_SIZE 521 /* Intentionally chose an odd size. */ | ||
20 | #define SMALL_BUFFER_SIZE 24 /* A single bts entry. */ | ||
21 | |||
22 | struct ds_selftest_bts_conf { | ||
23 | struct bts_tracer *tracer; | ||
24 | int error; | ||
25 | int (*suspend)(struct bts_tracer *); | ||
26 | int (*resume)(struct bts_tracer *); | ||
27 | }; | ||
28 | |||
29 | static int ds_selftest_bts_consistency(const struct bts_trace *trace) | ||
30 | { | ||
31 | int error = 0; | ||
32 | |||
33 | if (!trace) { | ||
34 | printk(KERN_CONT "failed to access trace..."); | ||
35 | /* Bail out. Other tests are pointless. */ | ||
36 | return -1; | ||
37 | } | ||
38 | |||
39 | if (!trace->read) { | ||
40 | printk(KERN_CONT "bts read not available..."); | ||
41 | error = -1; | ||
42 | } | ||
43 | |||
44 | /* Do some sanity checks on the trace configuration. */ | ||
45 | if (!trace->ds.n) { | ||
46 | printk(KERN_CONT "empty bts buffer..."); | ||
47 | error = -1; | ||
48 | } | ||
49 | if (!trace->ds.size) { | ||
50 | printk(KERN_CONT "bad bts trace setup..."); | ||
51 | error = -1; | ||
52 | } | ||
53 | if (trace->ds.end != | ||
54 | (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) { | ||
55 | printk(KERN_CONT "bad bts buffer setup..."); | ||
56 | error = -1; | ||
57 | } | ||
58 | /* | ||
59 | * We allow top in [begin; end], since its not clear when the | ||
60 | * overflow adjustment happens: after the increment or before the | ||
61 | * write. | ||
62 | */ | ||
63 | if ((trace->ds.top < trace->ds.begin) || | ||
64 | (trace->ds.end < trace->ds.top)) { | ||
65 | printk(KERN_CONT "bts top out of bounds..."); | ||
66 | error = -1; | ||
67 | } | ||
68 | |||
69 | return error; | ||
70 | } | ||
71 | |||
72 | static int ds_selftest_bts_read(struct bts_tracer *tracer, | ||
73 | const struct bts_trace *trace, | ||
74 | const void *from, const void *to) | ||
75 | { | ||
76 | const unsigned char *at; | ||
77 | |||
78 | /* | ||
79 | * Check a few things which do not belong to this test. | ||
80 | * They should be covered by other tests. | ||
81 | */ | ||
82 | if (!trace) | ||
83 | return -1; | ||
84 | |||
85 | if (!trace->read) | ||
86 | return -1; | ||
87 | |||
88 | if (to < from) | ||
89 | return -1; | ||
90 | |||
91 | if (from < trace->ds.begin) | ||
92 | return -1; | ||
93 | |||
94 | if (trace->ds.end < to) | ||
95 | return -1; | ||
96 | |||
97 | if (!trace->ds.size) | ||
98 | return -1; | ||
99 | |||
100 | /* Now to the test itself. */ | ||
101 | for (at = from; (void *)at < to; at += trace->ds.size) { | ||
102 | struct bts_struct bts; | ||
103 | unsigned long index; | ||
104 | int error; | ||
105 | |||
106 | if (((void *)at - trace->ds.begin) % trace->ds.size) { | ||
107 | printk(KERN_CONT | ||
108 | "read from non-integer index..."); | ||
109 | return -1; | ||
110 | } | ||
111 | index = ((void *)at - trace->ds.begin) / trace->ds.size; | ||
112 | |||
113 | memset(&bts, 0, sizeof(bts)); | ||
114 | error = trace->read(tracer, at, &bts); | ||
115 | if (error < 0) { | ||
116 | printk(KERN_CONT | ||
117 | "error reading bts trace at [%lu] (0x%p)...", | ||
118 | index, at); | ||
119 | return error; | ||
120 | } | ||
121 | |||
122 | switch (bts.qualifier) { | ||
123 | case BTS_BRANCH: | ||
124 | break; | ||
125 | default: | ||
126 | printk(KERN_CONT | ||
127 | "unexpected bts entry %llu at [%lu] (0x%p)...", | ||
128 | bts.qualifier, index, at); | ||
129 | return -1; | ||
130 | } | ||
131 | } | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | static void ds_selftest_bts_cpu(void *arg) | ||
137 | { | ||
138 | struct ds_selftest_bts_conf *conf = arg; | ||
139 | const struct bts_trace *trace; | ||
140 | void *top; | ||
141 | |||
142 | if (IS_ERR(conf->tracer)) { | ||
143 | conf->error = PTR_ERR(conf->tracer); | ||
144 | conf->tracer = NULL; | ||
145 | |||
146 | printk(KERN_CONT | ||
147 | "initialization failed (err: %d)...", conf->error); | ||
148 | return; | ||
149 | } | ||
150 | |||
151 | /* We should meanwhile have enough trace. */ | ||
152 | conf->error = conf->suspend(conf->tracer); | ||
153 | if (conf->error < 0) | ||
154 | return; | ||
155 | |||
156 | /* Let's see if we can access the trace. */ | ||
157 | trace = ds_read_bts(conf->tracer); | ||
158 | |||
159 | conf->error = ds_selftest_bts_consistency(trace); | ||
160 | if (conf->error < 0) | ||
161 | return; | ||
162 | |||
163 | /* If everything went well, we should have a few trace entries. */ | ||
164 | if (trace->ds.top == trace->ds.begin) { | ||
165 | /* | ||
166 | * It is possible but highly unlikely that we got a | ||
167 | * buffer overflow and end up at exactly the same | ||
168 | * position we started from. | ||
169 | * Let's issue a warning, but continue. | ||
170 | */ | ||
171 | printk(KERN_CONT "no trace/overflow..."); | ||
172 | } | ||
173 | |||
174 | /* Let's try to read the trace we collected. */ | ||
175 | conf->error = | ||
176 | ds_selftest_bts_read(conf->tracer, trace, | ||
177 | trace->ds.begin, trace->ds.top); | ||
178 | if (conf->error < 0) | ||
179 | return; | ||
180 | |||
181 | /* | ||
182 | * Let's read the trace again. | ||
183 | * Since we suspended tracing, we should get the same result. | ||
184 | */ | ||
185 | top = trace->ds.top; | ||
186 | |||
187 | trace = ds_read_bts(conf->tracer); | ||
188 | conf->error = ds_selftest_bts_consistency(trace); | ||
189 | if (conf->error < 0) | ||
190 | return; | ||
191 | |||
192 | if (top != trace->ds.top) { | ||
193 | printk(KERN_CONT "suspend not working..."); | ||
194 | conf->error = -1; | ||
195 | return; | ||
196 | } | ||
197 | |||
198 | /* Let's collect some more trace - see if resume is working. */ | ||
199 | conf->error = conf->resume(conf->tracer); | ||
200 | if (conf->error < 0) | ||
201 | return; | ||
202 | |||
203 | conf->error = conf->suspend(conf->tracer); | ||
204 | if (conf->error < 0) | ||
205 | return; | ||
206 | |||
207 | trace = ds_read_bts(conf->tracer); | ||
208 | |||
209 | conf->error = ds_selftest_bts_consistency(trace); | ||
210 | if (conf->error < 0) | ||
211 | return; | ||
212 | |||
213 | if (trace->ds.top == top) { | ||
214 | /* | ||
215 | * It is possible but highly unlikely that we got a | ||
216 | * buffer overflow and end up at exactly the same | ||
217 | * position we started from. | ||
218 | * Let's issue a warning and check the full trace. | ||
219 | */ | ||
220 | printk(KERN_CONT | ||
221 | "no resume progress/overflow..."); | ||
222 | |||
223 | conf->error = | ||
224 | ds_selftest_bts_read(conf->tracer, trace, | ||
225 | trace->ds.begin, trace->ds.end); | ||
226 | } else if (trace->ds.top < top) { | ||
227 | /* | ||
228 | * We had a buffer overflow - the entire buffer should | ||
229 | * contain trace records. | ||
230 | */ | ||
231 | conf->error = | ||
232 | ds_selftest_bts_read(conf->tracer, trace, | ||
233 | trace->ds.begin, trace->ds.end); | ||
234 | } else { | ||
235 | /* | ||
236 | * It is quite likely that the buffer did not overflow. | ||
237 | * Let's just check the delta trace. | ||
238 | */ | ||
239 | conf->error = | ||
240 | ds_selftest_bts_read(conf->tracer, trace, top, | ||
241 | trace->ds.top); | ||
242 | } | ||
243 | if (conf->error < 0) | ||
244 | return; | ||
245 | |||
246 | conf->error = 0; | ||
247 | } | ||
248 | |||
249 | static int ds_suspend_bts_wrap(struct bts_tracer *tracer) | ||
250 | { | ||
251 | ds_suspend_bts(tracer); | ||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | static int ds_resume_bts_wrap(struct bts_tracer *tracer) | ||
256 | { | ||
257 | ds_resume_bts(tracer); | ||
258 | return 0; | ||
259 | } | ||
260 | |||
261 | static void ds_release_bts_noirq_wrap(void *tracer) | ||
262 | { | ||
263 | (void)ds_release_bts_noirq(tracer); | ||
264 | } | ||
265 | |||
266 | static int ds_selftest_bts_bad_release_noirq(int cpu, | ||
267 | struct bts_tracer *tracer) | ||
268 | { | ||
269 | int error = -EPERM; | ||
270 | |||
271 | /* Try to release the tracer on the wrong cpu. */ | ||
272 | get_cpu(); | ||
273 | if (cpu != smp_processor_id()) { | ||
274 | error = ds_release_bts_noirq(tracer); | ||
275 | if (error != -EPERM) | ||
276 | printk(KERN_CONT "release on wrong cpu..."); | ||
277 | } | ||
278 | put_cpu(); | ||
279 | |||
280 | return error ? 0 : -1; | ||
281 | } | ||
282 | |||
283 | static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer) | ||
284 | { | ||
285 | struct bts_tracer *tracer; | ||
286 | int error; | ||
287 | |||
288 | /* Try to request cpu tracing while task tracing is active. */ | ||
289 | tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL, | ||
290 | (size_t)-1, BTS_KERNEL); | ||
291 | error = PTR_ERR(tracer); | ||
292 | if (!IS_ERR(tracer)) { | ||
293 | ds_release_bts(tracer); | ||
294 | error = 0; | ||
295 | } | ||
296 | |||
297 | if (error != -EPERM) | ||
298 | printk(KERN_CONT "cpu/task tracing overlap..."); | ||
299 | |||
300 | return error ? 0 : -1; | ||
301 | } | ||
302 | |||
303 | static int ds_selftest_bts_bad_request_task(void *buffer) | ||
304 | { | ||
305 | struct bts_tracer *tracer; | ||
306 | int error; | ||
307 | |||
308 | /* Try to request cpu tracing while task tracing is active. */ | ||
309 | tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL, | ||
310 | (size_t)-1, BTS_KERNEL); | ||
311 | error = PTR_ERR(tracer); | ||
312 | if (!IS_ERR(tracer)) { | ||
313 | error = 0; | ||
314 | ds_release_bts(tracer); | ||
315 | } | ||
316 | |||
317 | if (error != -EPERM) | ||
318 | printk(KERN_CONT "task/cpu tracing overlap..."); | ||
319 | |||
320 | return error ? 0 : -1; | ||
321 | } | ||
322 | |||
323 | int ds_selftest_bts(void) | ||
324 | { | ||
325 | struct ds_selftest_bts_conf conf; | ||
326 | unsigned char buffer[BUFFER_SIZE], *small_buffer; | ||
327 | unsigned long irq; | ||
328 | int cpu; | ||
329 | |||
330 | printk(KERN_INFO "[ds] bts selftest..."); | ||
331 | conf.error = 0; | ||
332 | |||
333 | small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8; | ||
334 | |||
335 | get_online_cpus(); | ||
336 | for_each_online_cpu(cpu) { | ||
337 | conf.suspend = ds_suspend_bts_wrap; | ||
338 | conf.resume = ds_resume_bts_wrap; | ||
339 | conf.tracer = | ||
340 | ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, | ||
341 | NULL, (size_t)-1, BTS_KERNEL); | ||
342 | ds_selftest_bts_cpu(&conf); | ||
343 | if (conf.error >= 0) | ||
344 | conf.error = ds_selftest_bts_bad_request_task(buffer); | ||
345 | ds_release_bts(conf.tracer); | ||
346 | if (conf.error < 0) | ||
347 | goto out; | ||
348 | |||
349 | conf.suspend = ds_suspend_bts_noirq; | ||
350 | conf.resume = ds_resume_bts_noirq; | ||
351 | conf.tracer = | ||
352 | ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, | ||
353 | NULL, (size_t)-1, BTS_KERNEL); | ||
354 | smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1); | ||
355 | if (conf.error >= 0) { | ||
356 | conf.error = | ||
357 | ds_selftest_bts_bad_release_noirq(cpu, | ||
358 | conf.tracer); | ||
359 | /* We must not release the tracer twice. */ | ||
360 | if (conf.error < 0) | ||
361 | conf.tracer = NULL; | ||
362 | } | ||
363 | if (conf.error >= 0) | ||
364 | conf.error = ds_selftest_bts_bad_request_task(buffer); | ||
365 | smp_call_function_single(cpu, ds_release_bts_noirq_wrap, | ||
366 | conf.tracer, 1); | ||
367 | if (conf.error < 0) | ||
368 | goto out; | ||
369 | } | ||
370 | |||
371 | conf.suspend = ds_suspend_bts_wrap; | ||
372 | conf.resume = ds_resume_bts_wrap; | ||
373 | conf.tracer = | ||
374 | ds_request_bts_task(current, buffer, BUFFER_SIZE, | ||
375 | NULL, (size_t)-1, BTS_KERNEL); | ||
376 | ds_selftest_bts_cpu(&conf); | ||
377 | if (conf.error >= 0) | ||
378 | conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); | ||
379 | ds_release_bts(conf.tracer); | ||
380 | if (conf.error < 0) | ||
381 | goto out; | ||
382 | |||
383 | conf.suspend = ds_suspend_bts_noirq; | ||
384 | conf.resume = ds_resume_bts_noirq; | ||
385 | conf.tracer = | ||
386 | ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE, | ||
387 | NULL, (size_t)-1, BTS_KERNEL); | ||
388 | local_irq_save(irq); | ||
389 | ds_selftest_bts_cpu(&conf); | ||
390 | if (conf.error >= 0) | ||
391 | conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); | ||
392 | ds_release_bts_noirq(conf.tracer); | ||
393 | local_irq_restore(irq); | ||
394 | if (conf.error < 0) | ||
395 | goto out; | ||
396 | |||
397 | conf.error = 0; | ||
398 | out: | ||
399 | put_online_cpus(); | ||
400 | printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed")); | ||
401 | |||
402 | return conf.error; | ||
403 | } | ||
404 | |||
405 | int ds_selftest_pebs(void) | ||
406 | { | ||
407 | return 0; | ||
408 | } | ||
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h new file mode 100644 index 000000000000..2ba8745c6663 --- /dev/null +++ b/arch/x86/kernel/ds_selftest.h | |||
@@ -0,0 +1,15 @@ | |||
1 | /* | ||
2 | * Debug Store support - selftest | ||
3 | * | ||
4 | * | ||
5 | * Copyright (C) 2009 Intel Corporation. | ||
6 | * Markus Metzger <markus.t.metzger@intel.com>, 2009 | ||
7 | */ | ||
8 | |||
9 | #ifdef CONFIG_X86_DS_SELFTEST | ||
10 | extern int ds_selftest_bts(void); | ||
11 | extern int ds_selftest_pebs(void); | ||
12 | #else | ||
13 | static inline int ds_selftest_bts(void) { return 0; } | ||
14 | static inline int ds_selftest_pebs(void) { return 0; } | ||
15 | #endif | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index bb01ce080b80..1c17d7c751a4 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -147,27 +147,14 @@ END(ftrace_graph_caller) | |||
147 | GLOBAL(return_to_handler) | 147 | GLOBAL(return_to_handler) |
148 | subq $80, %rsp | 148 | subq $80, %rsp |
149 | 149 | ||
150 | /* Save the return values */ | ||
150 | movq %rax, (%rsp) | 151 | movq %rax, (%rsp) |
151 | movq %rcx, 8(%rsp) | 152 | movq %rdx, 8(%rsp) |
152 | movq %rdx, 16(%rsp) | ||
153 | movq %rsi, 24(%rsp) | ||
154 | movq %rdi, 32(%rsp) | ||
155 | movq %r8, 40(%rsp) | ||
156 | movq %r9, 48(%rsp) | ||
157 | movq %r10, 56(%rsp) | ||
158 | movq %r11, 64(%rsp) | ||
159 | 153 | ||
160 | call ftrace_return_to_handler | 154 | call ftrace_return_to_handler |
161 | 155 | ||
162 | movq %rax, 72(%rsp) | 156 | movq %rax, 72(%rsp) |
163 | movq 64(%rsp), %r11 | 157 | movq 8(%rsp), %rdx |
164 | movq 56(%rsp), %r10 | ||
165 | movq 48(%rsp), %r9 | ||
166 | movq 40(%rsp), %r8 | ||
167 | movq 32(%rsp), %rdi | ||
168 | movq 24(%rsp), %rsi | ||
169 | movq 16(%rsp), %rdx | ||
170 | movq 8(%rsp), %rcx | ||
171 | movq (%rsp), %rax | 158 | movq (%rsp), %rax |
172 | addq $72, %rsp | 159 | addq $72, %rsp |
173 | retq | 160 | retq |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e22d63bdc8ff..3bb2be1649bd 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/idle.h> | 16 | #include <asm/idle.h> |
17 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
18 | #include <asm/i387.h> | 18 | #include <asm/i387.h> |
19 | #include <asm/ds.h> | ||
19 | 20 | ||
20 | unsigned long idle_halt; | 21 | unsigned long idle_halt; |
21 | EXPORT_SYMBOL(idle_halt); | 22 | EXPORT_SYMBOL(idle_halt); |
@@ -47,6 +48,8 @@ void free_thread_xstate(struct task_struct *tsk) | |||
47 | kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); | 48 | kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); |
48 | tsk->thread.xstate = NULL; | 49 | tsk->thread.xstate = NULL; |
49 | } | 50 | } |
51 | |||
52 | WARN(tsk->thread.ds_ctx, "leaking DS context\n"); | ||
50 | } | 53 | } |
51 | 54 | ||
52 | void free_thread_info(struct thread_info *ti) | 55 | void free_thread_info(struct thread_info *ti) |
@@ -85,8 +88,6 @@ void exit_thread(void) | |||
85 | put_cpu(); | 88 | put_cpu(); |
86 | kfree(bp); | 89 | kfree(bp); |
87 | } | 90 | } |
88 | |||
89 | ds_exit_thread(current); | ||
90 | } | 91 | } |
91 | 92 | ||
92 | void flush_thread(void) | 93 | void flush_thread(void) |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index c60924b5d123..59f4524984af 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -287,7 +287,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
287 | p->thread.io_bitmap_max = 0; | 287 | p->thread.io_bitmap_max = 0; |
288 | } | 288 | } |
289 | 289 | ||
290 | ds_copy_thread(p, current); | 290 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); |
291 | p->thread.ds_ctx = NULL; | ||
291 | 292 | ||
292 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | 293 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); |
293 | p->thread.debugctlmsr = 0; | 294 | p->thread.debugctlmsr = 0; |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 45f010fb2e20..ebefb5407b9d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -332,7 +332,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
332 | goto out; | 332 | goto out; |
333 | } | 333 | } |
334 | 334 | ||
335 | ds_copy_thread(p, me); | 335 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); |
336 | p->thread.ds_ctx = NULL; | ||
336 | 337 | ||
337 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | 338 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); |
338 | p->thread.debugctlmsr = 0; | 339 | p->thread.debugctlmsr = 0; |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 23b7c8f017e2..09ecbde91c13 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/audit.h> | 21 | #include <linux/audit.h> |
22 | #include <linux/seccomp.h> | 22 | #include <linux/seccomp.h> |
23 | #include <linux/signal.h> | 23 | #include <linux/signal.h> |
24 | #include <linux/workqueue.h> | ||
24 | 25 | ||
25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
26 | #include <asm/pgtable.h> | 27 | #include <asm/pgtable.h> |
@@ -578,17 +579,130 @@ static int ioperm_get(struct task_struct *target, | |||
578 | } | 579 | } |
579 | 580 | ||
580 | #ifdef CONFIG_X86_PTRACE_BTS | 581 | #ifdef CONFIG_X86_PTRACE_BTS |
582 | /* | ||
583 | * A branch trace store context. | ||
584 | * | ||
585 | * Contexts may only be installed by ptrace_bts_config() and only for | ||
586 | * ptraced tasks. | ||
587 | * | ||
588 | * Contexts are destroyed when the tracee is detached from the tracer. | ||
589 | * The actual destruction work requires interrupts enabled, so the | ||
590 | * work is deferred and will be scheduled during __ptrace_unlink(). | ||
591 | * | ||
592 | * Contexts hold an additional task_struct reference on the traced | ||
593 | * task, as well as a reference on the tracer's mm. | ||
594 | * | ||
595 | * Ptrace already holds a task_struct for the duration of ptrace operations, | ||
596 | * but since destruction is deferred, it may be executed after both | ||
597 | * tracer and tracee exited. | ||
598 | */ | ||
599 | struct bts_context { | ||
600 | /* The branch trace handle. */ | ||
601 | struct bts_tracer *tracer; | ||
602 | |||
603 | /* The buffer used to store the branch trace and its size. */ | ||
604 | void *buffer; | ||
605 | unsigned int size; | ||
606 | |||
607 | /* The mm that paid for the above buffer. */ | ||
608 | struct mm_struct *mm; | ||
609 | |||
610 | /* The task this context belongs to. */ | ||
611 | struct task_struct *task; | ||
612 | |||
613 | /* The signal to send on a bts buffer overflow. */ | ||
614 | unsigned int bts_ovfl_signal; | ||
615 | |||
616 | /* The work struct to destroy a context. */ | ||
617 | struct work_struct work; | ||
618 | }; | ||
619 | |||
620 | static int alloc_bts_buffer(struct bts_context *context, unsigned int size) | ||
621 | { | ||
622 | void *buffer = NULL; | ||
623 | int err = -ENOMEM; | ||
624 | |||
625 | err = account_locked_memory(current->mm, current->signal->rlim, size); | ||
626 | if (err < 0) | ||
627 | return err; | ||
628 | |||
629 | buffer = kzalloc(size, GFP_KERNEL); | ||
630 | if (!buffer) | ||
631 | goto out_refund; | ||
632 | |||
633 | context->buffer = buffer; | ||
634 | context->size = size; | ||
635 | context->mm = get_task_mm(current); | ||
636 | |||
637 | return 0; | ||
638 | |||
639 | out_refund: | ||
640 | refund_locked_memory(current->mm, size); | ||
641 | return err; | ||
642 | } | ||
643 | |||
644 | static inline void free_bts_buffer(struct bts_context *context) | ||
645 | { | ||
646 | if (!context->buffer) | ||
647 | return; | ||
648 | |||
649 | kfree(context->buffer); | ||
650 | context->buffer = NULL; | ||
651 | |||
652 | refund_locked_memory(context->mm, context->size); | ||
653 | context->size = 0; | ||
654 | |||
655 | mmput(context->mm); | ||
656 | context->mm = NULL; | ||
657 | } | ||
658 | |||
659 | static void free_bts_context_work(struct work_struct *w) | ||
660 | { | ||
661 | struct bts_context *context; | ||
662 | |||
663 | context = container_of(w, struct bts_context, work); | ||
664 | |||
665 | ds_release_bts(context->tracer); | ||
666 | put_task_struct(context->task); | ||
667 | free_bts_buffer(context); | ||
668 | kfree(context); | ||
669 | } | ||
670 | |||
671 | static inline void free_bts_context(struct bts_context *context) | ||
672 | { | ||
673 | INIT_WORK(&context->work, free_bts_context_work); | ||
674 | schedule_work(&context->work); | ||
675 | } | ||
676 | |||
677 | static inline struct bts_context *alloc_bts_context(struct task_struct *task) | ||
678 | { | ||
679 | struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL); | ||
680 | if (context) { | ||
681 | context->task = task; | ||
682 | task->bts = context; | ||
683 | |||
684 | get_task_struct(task); | ||
685 | } | ||
686 | |||
687 | return context; | ||
688 | } | ||
689 | |||
581 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, | 690 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, |
582 | struct bts_struct __user *out) | 691 | struct bts_struct __user *out) |
583 | { | 692 | { |
693 | struct bts_context *context; | ||
584 | const struct bts_trace *trace; | 694 | const struct bts_trace *trace; |
585 | struct bts_struct bts; | 695 | struct bts_struct bts; |
586 | const unsigned char *at; | 696 | const unsigned char *at; |
587 | int error; | 697 | int error; |
588 | 698 | ||
589 | trace = ds_read_bts(child->bts); | 699 | context = child->bts; |
700 | if (!context) | ||
701 | return -ESRCH; | ||
702 | |||
703 | trace = ds_read_bts(context->tracer); | ||
590 | if (!trace) | 704 | if (!trace) |
591 | return -EPERM; | 705 | return -ESRCH; |
592 | 706 | ||
593 | at = trace->ds.top - ((index + 1) * trace->ds.size); | 707 | at = trace->ds.top - ((index + 1) * trace->ds.size); |
594 | if ((void *)at < trace->ds.begin) | 708 | if ((void *)at < trace->ds.begin) |
@@ -597,7 +711,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index, | |||
597 | if (!trace->read) | 711 | if (!trace->read) |
598 | return -EOPNOTSUPP; | 712 | return -EOPNOTSUPP; |
599 | 713 | ||
600 | error = trace->read(child->bts, at, &bts); | 714 | error = trace->read(context->tracer, at, &bts); |
601 | if (error < 0) | 715 | if (error < 0) |
602 | return error; | 716 | return error; |
603 | 717 | ||
@@ -611,13 +725,18 @@ static int ptrace_bts_drain(struct task_struct *child, | |||
611 | long size, | 725 | long size, |
612 | struct bts_struct __user *out) | 726 | struct bts_struct __user *out) |
613 | { | 727 | { |
728 | struct bts_context *context; | ||
614 | const struct bts_trace *trace; | 729 | const struct bts_trace *trace; |
615 | const unsigned char *at; | 730 | const unsigned char *at; |
616 | int error, drained = 0; | 731 | int error, drained = 0; |
617 | 732 | ||
618 | trace = ds_read_bts(child->bts); | 733 | context = child->bts; |
734 | if (!context) | ||
735 | return -ESRCH; | ||
736 | |||
737 | trace = ds_read_bts(context->tracer); | ||
619 | if (!trace) | 738 | if (!trace) |
620 | return -EPERM; | 739 | return -ESRCH; |
621 | 740 | ||
622 | if (!trace->read) | 741 | if (!trace->read) |
623 | return -EOPNOTSUPP; | 742 | return -EOPNOTSUPP; |
@@ -628,9 +747,8 @@ static int ptrace_bts_drain(struct task_struct *child, | |||
628 | for (at = trace->ds.begin; (void *)at < trace->ds.top; | 747 | for (at = trace->ds.begin; (void *)at < trace->ds.top; |
629 | out++, drained++, at += trace->ds.size) { | 748 | out++, drained++, at += trace->ds.size) { |
630 | struct bts_struct bts; | 749 | struct bts_struct bts; |
631 | int error; | ||
632 | 750 | ||
633 | error = trace->read(child->bts, at, &bts); | 751 | error = trace->read(context->tracer, at, &bts); |
634 | if (error < 0) | 752 | if (error < 0) |
635 | return error; | 753 | return error; |
636 | 754 | ||
@@ -640,35 +758,18 @@ static int ptrace_bts_drain(struct task_struct *child, | |||
640 | 758 | ||
641 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); | 759 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); |
642 | 760 | ||
643 | error = ds_reset_bts(child->bts); | 761 | error = ds_reset_bts(context->tracer); |
644 | if (error < 0) | 762 | if (error < 0) |
645 | return error; | 763 | return error; |
646 | 764 | ||
647 | return drained; | 765 | return drained; |
648 | } | 766 | } |
649 | 767 | ||
650 | static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size) | ||
651 | { | ||
652 | child->bts_buffer = alloc_locked_buffer(size); | ||
653 | if (!child->bts_buffer) | ||
654 | return -ENOMEM; | ||
655 | |||
656 | child->bts_size = size; | ||
657 | |||
658 | return 0; | ||
659 | } | ||
660 | |||
661 | static void ptrace_bts_free_buffer(struct task_struct *child) | ||
662 | { | ||
663 | free_locked_buffer(child->bts_buffer, child->bts_size); | ||
664 | child->bts_buffer = NULL; | ||
665 | child->bts_size = 0; | ||
666 | } | ||
667 | |||
668 | static int ptrace_bts_config(struct task_struct *child, | 768 | static int ptrace_bts_config(struct task_struct *child, |
669 | long cfg_size, | 769 | long cfg_size, |
670 | const struct ptrace_bts_config __user *ucfg) | 770 | const struct ptrace_bts_config __user *ucfg) |
671 | { | 771 | { |
772 | struct bts_context *context; | ||
672 | struct ptrace_bts_config cfg; | 773 | struct ptrace_bts_config cfg; |
673 | unsigned int flags = 0; | 774 | unsigned int flags = 0; |
674 | 775 | ||
@@ -678,28 +779,33 @@ static int ptrace_bts_config(struct task_struct *child, | |||
678 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | 779 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) |
679 | return -EFAULT; | 780 | return -EFAULT; |
680 | 781 | ||
681 | if (child->bts) { | 782 | context = child->bts; |
682 | ds_release_bts(child->bts); | 783 | if (!context) |
683 | child->bts = NULL; | 784 | context = alloc_bts_context(child); |
684 | } | 785 | if (!context) |
786 | return -ENOMEM; | ||
685 | 787 | ||
686 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { | 788 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { |
687 | if (!cfg.signal) | 789 | if (!cfg.signal) |
688 | return -EINVAL; | 790 | return -EINVAL; |
689 | 791 | ||
690 | child->thread.bts_ovfl_signal = cfg.signal; | ||
691 | return -EOPNOTSUPP; | 792 | return -EOPNOTSUPP; |
793 | context->bts_ovfl_signal = cfg.signal; | ||
692 | } | 794 | } |
693 | 795 | ||
694 | if ((cfg.flags & PTRACE_BTS_O_ALLOC) && | 796 | ds_release_bts(context->tracer); |
695 | (cfg.size != child->bts_size)) { | 797 | context->tracer = NULL; |
696 | int error; | ||
697 | 798 | ||
698 | ptrace_bts_free_buffer(child); | 799 | if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) { |
800 | int err; | ||
699 | 801 | ||
700 | error = ptrace_bts_allocate_buffer(child, cfg.size); | 802 | free_bts_buffer(context); |
701 | if (error < 0) | 803 | if (!cfg.size) |
702 | return error; | 804 | return 0; |
805 | |||
806 | err = alloc_bts_buffer(context, cfg.size); | ||
807 | if (err < 0) | ||
808 | return err; | ||
703 | } | 809 | } |
704 | 810 | ||
705 | if (cfg.flags & PTRACE_BTS_O_TRACE) | 811 | if (cfg.flags & PTRACE_BTS_O_TRACE) |
@@ -708,15 +814,14 @@ static int ptrace_bts_config(struct task_struct *child, | |||
708 | if (cfg.flags & PTRACE_BTS_O_SCHED) | 814 | if (cfg.flags & PTRACE_BTS_O_SCHED) |
709 | flags |= BTS_TIMESTAMPS; | 815 | flags |= BTS_TIMESTAMPS; |
710 | 816 | ||
711 | child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size, | 817 | context->tracer = |
712 | /* ovfl = */ NULL, /* th = */ (size_t)-1, | 818 | ds_request_bts_task(child, context->buffer, context->size, |
713 | flags); | 819 | NULL, (size_t)-1, flags); |
714 | if (IS_ERR(child->bts)) { | 820 | if (unlikely(IS_ERR(context->tracer))) { |
715 | int error = PTR_ERR(child->bts); | 821 | int error = PTR_ERR(context->tracer); |
716 | |||
717 | ptrace_bts_free_buffer(child); | ||
718 | child->bts = NULL; | ||
719 | 822 | ||
823 | free_bts_buffer(context); | ||
824 | context->tracer = NULL; | ||
720 | return error; | 825 | return error; |
721 | } | 826 | } |
722 | 827 | ||
@@ -727,20 +832,25 @@ static int ptrace_bts_status(struct task_struct *child, | |||
727 | long cfg_size, | 832 | long cfg_size, |
728 | struct ptrace_bts_config __user *ucfg) | 833 | struct ptrace_bts_config __user *ucfg) |
729 | { | 834 | { |
835 | struct bts_context *context; | ||
730 | const struct bts_trace *trace; | 836 | const struct bts_trace *trace; |
731 | struct ptrace_bts_config cfg; | 837 | struct ptrace_bts_config cfg; |
732 | 838 | ||
839 | context = child->bts; | ||
840 | if (!context) | ||
841 | return -ESRCH; | ||
842 | |||
733 | if (cfg_size < sizeof(cfg)) | 843 | if (cfg_size < sizeof(cfg)) |
734 | return -EIO; | 844 | return -EIO; |
735 | 845 | ||
736 | trace = ds_read_bts(child->bts); | 846 | trace = ds_read_bts(context->tracer); |
737 | if (!trace) | 847 | if (!trace) |
738 | return -EPERM; | 848 | return -ESRCH; |
739 | 849 | ||
740 | memset(&cfg, 0, sizeof(cfg)); | 850 | memset(&cfg, 0, sizeof(cfg)); |
741 | cfg.size = trace->ds.end - trace->ds.begin; | 851 | cfg.size = trace->ds.end - trace->ds.begin; |
742 | cfg.signal = child->thread.bts_ovfl_signal; | 852 | cfg.signal = context->bts_ovfl_signal; |
743 | cfg.bts_size = sizeof(struct bts_struct); | 853 | cfg.bts_size = sizeof(struct bts_struct); |
744 | 854 | ||
745 | if (cfg.signal) | 855 | if (cfg.signal) |
746 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | 856 | cfg.flags |= PTRACE_BTS_O_SIGNAL; |
@@ -759,80 +869,51 @@ static int ptrace_bts_status(struct task_struct *child, | |||
759 | 869 | ||
760 | static int ptrace_bts_clear(struct task_struct *child) | 870 | static int ptrace_bts_clear(struct task_struct *child) |
761 | { | 871 | { |
872 | struct bts_context *context; | ||
762 | const struct bts_trace *trace; | 873 | const struct bts_trace *trace; |
763 | 874 | ||
764 | trace = ds_read_bts(child->bts); | 875 | context = child->bts; |
876 | if (!context) | ||
877 | return -ESRCH; | ||
878 | |||
879 | trace = ds_read_bts(context->tracer); | ||
765 | if (!trace) | 880 | if (!trace) |
766 | return -EPERM; | 881 | return -ESRCH; |
767 | 882 | ||
768 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); | 883 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); |
769 | 884 | ||
770 | return ds_reset_bts(child->bts); | 885 | return ds_reset_bts(context->tracer); |
771 | } | 886 | } |
772 | 887 | ||
773 | static int ptrace_bts_size(struct task_struct *child) | 888 | static int ptrace_bts_size(struct task_struct *child) |
774 | { | 889 | { |
890 | struct bts_context *context; | ||
775 | const struct bts_trace *trace; | 891 | const struct bts_trace *trace; |
776 | 892 | ||
777 | trace = ds_read_bts(child->bts); | 893 | context = child->bts; |
894 | if (!context) | ||
895 | return -ESRCH; | ||
896 | |||
897 | trace = ds_read_bts(context->tracer); | ||
778 | if (!trace) | 898 | if (!trace) |
779 | return -EPERM; | 899 | return -ESRCH; |
780 | 900 | ||
781 | return (trace->ds.top - trace->ds.begin) / trace->ds.size; | 901 | return (trace->ds.top - trace->ds.begin) / trace->ds.size; |
782 | } | 902 | } |
783 | 903 | ||
784 | static void ptrace_bts_fork(struct task_struct *tsk) | 904 | /* |
785 | { | 905 | * Called from __ptrace_unlink() after the child has been moved back |
786 | tsk->bts = NULL; | 906 | * to its original parent. |
787 | tsk->bts_buffer = NULL; | 907 | */ |
788 | tsk->bts_size = 0; | 908 | void ptrace_bts_untrace(struct task_struct *child) |
789 | tsk->thread.bts_ovfl_signal = 0; | ||
790 | } | ||
791 | |||
792 | static void ptrace_bts_untrace(struct task_struct *child) | ||
793 | { | 909 | { |
794 | if (unlikely(child->bts)) { | 910 | if (unlikely(child->bts)) { |
795 | ds_release_bts(child->bts); | 911 | free_bts_context(child->bts); |
796 | child->bts = NULL; | 912 | child->bts = NULL; |
797 | |||
798 | /* We cannot update total_vm and locked_vm since | ||
799 | child's mm is already gone. But we can reclaim the | ||
800 | memory. */ | ||
801 | kfree(child->bts_buffer); | ||
802 | child->bts_buffer = NULL; | ||
803 | child->bts_size = 0; | ||
804 | } | 913 | } |
805 | } | 914 | } |
806 | |||
807 | static void ptrace_bts_detach(struct task_struct *child) | ||
808 | { | ||
809 | /* | ||
810 | * Ptrace_detach() races with ptrace_untrace() in case | ||
811 | * the child dies and is reaped by another thread. | ||
812 | * | ||
813 | * We only do the memory accounting at this point and | ||
814 | * leave the buffer deallocation and the bts tracer | ||
815 | * release to ptrace_bts_untrace() which will be called | ||
816 | * later on with tasklist_lock held. | ||
817 | */ | ||
818 | release_locked_buffer(child->bts_buffer, child->bts_size); | ||
819 | } | ||
820 | #else | ||
821 | static inline void ptrace_bts_fork(struct task_struct *tsk) {} | ||
822 | static inline void ptrace_bts_detach(struct task_struct *child) {} | ||
823 | static inline void ptrace_bts_untrace(struct task_struct *child) {} | ||
824 | #endif /* CONFIG_X86_PTRACE_BTS */ | 915 | #endif /* CONFIG_X86_PTRACE_BTS */ |
825 | 916 | ||
826 | void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags) | ||
827 | { | ||
828 | ptrace_bts_fork(child); | ||
829 | } | ||
830 | |||
831 | void x86_ptrace_untrace(struct task_struct *child) | ||
832 | { | ||
833 | ptrace_bts_untrace(child); | ||
834 | } | ||
835 | |||
836 | /* | 917 | /* |
837 | * Called by kernel/ptrace.c when detaching.. | 918 | * Called by kernel/ptrace.c when detaching.. |
838 | * | 919 | * |
@@ -844,7 +925,6 @@ void ptrace_disable(struct task_struct *child) | |||
844 | #ifdef TIF_SYSCALL_EMU | 925 | #ifdef TIF_SYSCALL_EMU |
845 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | 926 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); |
846 | #endif | 927 | #endif |
847 | ptrace_bts_detach(child); | ||
848 | } | 928 | } |
849 | 929 | ||
850 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 930 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index f7bddc2e37d1..4aaf7e48394f 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -20,7 +20,7 @@ save_stack_warning_symbol(void *data, char *msg, unsigned long symbol) | |||
20 | 20 | ||
21 | static int save_stack_stack(void *data, char *name) | 21 | static int save_stack_stack(void *data, char *name) |
22 | { | 22 | { |
23 | return -1; | 23 | return 0; |
24 | } | 24 | } |
25 | 25 | ||
26 | static void save_stack_address(void *data, unsigned long addr, int reliable) | 26 | static void save_stack_address(void *data, unsigned long addr, int reliable) |
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 50dc802a1c46..16ccbd77917f 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -32,7 +32,7 @@ struct kmmio_fault_page { | |||
32 | struct list_head list; | 32 | struct list_head list; |
33 | struct kmmio_fault_page *release_next; | 33 | struct kmmio_fault_page *release_next; |
34 | unsigned long page; /* location of the fault page */ | 34 | unsigned long page; /* location of the fault page */ |
35 | bool old_presence; /* page presence prior to arming */ | 35 | pteval_t old_presence; /* page presence prior to arming */ |
36 | bool armed; | 36 | bool armed; |
37 | 37 | ||
38 | /* | 38 | /* |
@@ -97,60 +97,62 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) | |||
97 | static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) | 97 | static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) |
98 | { | 98 | { |
99 | struct list_head *head; | 99 | struct list_head *head; |
100 | struct kmmio_fault_page *p; | 100 | struct kmmio_fault_page *f; |
101 | 101 | ||
102 | page &= PAGE_MASK; | 102 | page &= PAGE_MASK; |
103 | head = kmmio_page_list(page); | 103 | head = kmmio_page_list(page); |
104 | list_for_each_entry_rcu(p, head, list) { | 104 | list_for_each_entry_rcu(f, head, list) { |
105 | if (p->page == page) | 105 | if (f->page == page) |
106 | return p; | 106 | return f; |
107 | } | 107 | } |
108 | return NULL; | 108 | return NULL; |
109 | } | 109 | } |
110 | 110 | ||
111 | static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) | 111 | static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old) |
112 | { | 112 | { |
113 | pmdval_t v = pmd_val(*pmd); | 113 | pmdval_t v = pmd_val(*pmd); |
114 | *old = !!(v & _PAGE_PRESENT); | 114 | if (clear) { |
115 | v &= ~_PAGE_PRESENT; | 115 | *old = v & _PAGE_PRESENT; |
116 | if (present) | 116 | v &= ~_PAGE_PRESENT; |
117 | v |= _PAGE_PRESENT; | 117 | } else /* presume this has been called with clear==true previously */ |
118 | v |= *old; | ||
118 | set_pmd(pmd, __pmd(v)); | 119 | set_pmd(pmd, __pmd(v)); |
119 | } | 120 | } |
120 | 121 | ||
121 | static void set_pte_presence(pte_t *pte, bool present, bool *old) | 122 | static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) |
122 | { | 123 | { |
123 | pteval_t v = pte_val(*pte); | 124 | pteval_t v = pte_val(*pte); |
124 | *old = !!(v & _PAGE_PRESENT); | 125 | if (clear) { |
125 | v &= ~_PAGE_PRESENT; | 126 | *old = v & _PAGE_PRESENT; |
126 | if (present) | 127 | v &= ~_PAGE_PRESENT; |
127 | v |= _PAGE_PRESENT; | 128 | } else /* presume this has been called with clear==true previously */ |
129 | v |= *old; | ||
128 | set_pte_atomic(pte, __pte(v)); | 130 | set_pte_atomic(pte, __pte(v)); |
129 | } | 131 | } |
130 | 132 | ||
131 | static int set_page_presence(unsigned long addr, bool present, bool *old) | 133 | static int clear_page_presence(struct kmmio_fault_page *f, bool clear) |
132 | { | 134 | { |
133 | unsigned int level; | 135 | unsigned int level; |
134 | pte_t *pte = lookup_address(addr, &level); | 136 | pte_t *pte = lookup_address(f->page, &level); |
135 | 137 | ||
136 | if (!pte) { | 138 | if (!pte) { |
137 | pr_err("kmmio: no pte for page 0x%08lx\n", addr); | 139 | pr_err("kmmio: no pte for page 0x%08lx\n", f->page); |
138 | return -1; | 140 | return -1; |
139 | } | 141 | } |
140 | 142 | ||
141 | switch (level) { | 143 | switch (level) { |
142 | case PG_LEVEL_2M: | 144 | case PG_LEVEL_2M: |
143 | set_pmd_presence((pmd_t *)pte, present, old); | 145 | clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence); |
144 | break; | 146 | break; |
145 | case PG_LEVEL_4K: | 147 | case PG_LEVEL_4K: |
146 | set_pte_presence(pte, present, old); | 148 | clear_pte_presence(pte, clear, &f->old_presence); |
147 | break; | 149 | break; |
148 | default: | 150 | default: |
149 | pr_err("kmmio: unexpected page level 0x%x.\n", level); | 151 | pr_err("kmmio: unexpected page level 0x%x.\n", level); |
150 | return -1; | 152 | return -1; |
151 | } | 153 | } |
152 | 154 | ||
153 | __flush_tlb_one(addr); | 155 | __flush_tlb_one(f->page); |
154 | return 0; | 156 | return 0; |
155 | } | 157 | } |
156 | 158 | ||
@@ -171,9 +173,9 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) | |||
171 | WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); | 173 | WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); |
172 | if (f->armed) { | 174 | if (f->armed) { |
173 | pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", | 175 | pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", |
174 | f->page, f->count, f->old_presence); | 176 | f->page, f->count, !!f->old_presence); |
175 | } | 177 | } |
176 | ret = set_page_presence(f->page, false, &f->old_presence); | 178 | ret = clear_page_presence(f, true); |
177 | WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); | 179 | WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); |
178 | f->armed = true; | 180 | f->armed = true; |
179 | return ret; | 181 | return ret; |
@@ -182,8 +184,7 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) | |||
182 | /** Restore the given page to saved presence state. */ | 184 | /** Restore the given page to saved presence state. */ |
183 | static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) | 185 | static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) |
184 | { | 186 | { |
185 | bool tmp; | 187 | int ret = clear_page_presence(f, false); |
186 | int ret = set_page_presence(f->page, f->old_presence, &tmp); | ||
187 | WARN_ONCE(ret < 0, | 188 | WARN_ONCE(ret < 0, |
188 | KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); | 189 | KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); |
189 | f->armed = false; | 190 | f->armed = false; |
@@ -310,7 +311,12 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | |||
310 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); | 311 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); |
311 | 312 | ||
312 | if (!ctx->active) { | 313 | if (!ctx->active) { |
313 | pr_debug("kmmio: spurious debug trap on CPU %d.\n", | 314 | /* |
315 | * debug traps without an active context are due to either | ||
316 | * something external causing them (f.e. using a debugger while | ||
317 | * mmio tracing enabled), or erroneous behaviour | ||
318 | */ | ||
319 | pr_warning("kmmio: unexpected debug trap on CPU %d.\n", | ||
314 | smp_processor_id()); | 320 | smp_processor_id()); |
315 | goto out; | 321 | goto out; |
316 | } | 322 | } |
@@ -439,12 +445,12 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head) | |||
439 | head, | 445 | head, |
440 | struct kmmio_delayed_release, | 446 | struct kmmio_delayed_release, |
441 | rcu); | 447 | rcu); |
442 | struct kmmio_fault_page *p = dr->release_list; | 448 | struct kmmio_fault_page *f = dr->release_list; |
443 | while (p) { | 449 | while (f) { |
444 | struct kmmio_fault_page *next = p->release_next; | 450 | struct kmmio_fault_page *next = f->release_next; |
445 | BUG_ON(p->count); | 451 | BUG_ON(f->count); |
446 | kfree(p); | 452 | kfree(f); |
447 | p = next; | 453 | f = next; |
448 | } | 454 | } |
449 | kfree(dr); | 455 | kfree(dr); |
450 | } | 456 | } |
@@ -453,19 +459,19 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) | |||
453 | { | 459 | { |
454 | struct kmmio_delayed_release *dr = | 460 | struct kmmio_delayed_release *dr = |
455 | container_of(head, struct kmmio_delayed_release, rcu); | 461 | container_of(head, struct kmmio_delayed_release, rcu); |
456 | struct kmmio_fault_page *p = dr->release_list; | 462 | struct kmmio_fault_page *f = dr->release_list; |
457 | struct kmmio_fault_page **prevp = &dr->release_list; | 463 | struct kmmio_fault_page **prevp = &dr->release_list; |
458 | unsigned long flags; | 464 | unsigned long flags; |
459 | 465 | ||
460 | spin_lock_irqsave(&kmmio_lock, flags); | 466 | spin_lock_irqsave(&kmmio_lock, flags); |
461 | while (p) { | 467 | while (f) { |
462 | if (!p->count) { | 468 | if (!f->count) { |
463 | list_del_rcu(&p->list); | 469 | list_del_rcu(&f->list); |
464 | prevp = &p->release_next; | 470 | prevp = &f->release_next; |
465 | } else { | 471 | } else { |
466 | *prevp = p->release_next; | 472 | *prevp = f->release_next; |
467 | } | 473 | } |
468 | p = p->release_next; | 474 | f = f->release_next; |
469 | } | 475 | } |
470 | spin_unlock_irqrestore(&kmmio_lock, flags); | 476 | spin_unlock_irqrestore(&kmmio_lock, flags); |
471 | 477 | ||
@@ -528,8 +534,8 @@ void unregister_kmmio_probe(struct kmmio_probe *p) | |||
528 | } | 534 | } |
529 | EXPORT_SYMBOL(unregister_kmmio_probe); | 535 | EXPORT_SYMBOL(unregister_kmmio_probe); |
530 | 536 | ||
531 | static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, | 537 | static int |
532 | void *args) | 538 | kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) |
533 | { | 539 | { |
534 | struct die_args *arg = args; | 540 | struct die_args *arg = args; |
535 | 541 | ||
@@ -544,11 +550,23 @@ static struct notifier_block nb_die = { | |||
544 | .notifier_call = kmmio_die_notifier | 550 | .notifier_call = kmmio_die_notifier |
545 | }; | 551 | }; |
546 | 552 | ||
547 | static int __init init_kmmio(void) | 553 | int kmmio_init(void) |
548 | { | 554 | { |
549 | int i; | 555 | int i; |
556 | |||
550 | for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) | 557 | for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) |
551 | INIT_LIST_HEAD(&kmmio_page_table[i]); | 558 | INIT_LIST_HEAD(&kmmio_page_table[i]); |
559 | |||
552 | return register_die_notifier(&nb_die); | 560 | return register_die_notifier(&nb_die); |
553 | } | 561 | } |
554 | fs_initcall(init_kmmio); /* should be before device_initcall() */ | 562 | |
563 | void kmmio_cleanup(void) | ||
564 | { | ||
565 | int i; | ||
566 | |||
567 | unregister_die_notifier(&nb_die); | ||
568 | for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) { | ||
569 | WARN_ONCE(!list_empty(&kmmio_page_table[i]), | ||
570 | KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n"); | ||
571 | } | ||
572 | } | ||
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index c9342ed8b402..132772a8ec57 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c | |||
@@ -451,6 +451,7 @@ void enable_mmiotrace(void) | |||
451 | 451 | ||
452 | if (nommiotrace) | 452 | if (nommiotrace) |
453 | pr_info(NAME "MMIO tracing disabled.\n"); | 453 | pr_info(NAME "MMIO tracing disabled.\n"); |
454 | kmmio_init(); | ||
454 | enter_uniprocessor(); | 455 | enter_uniprocessor(); |
455 | spin_lock_irq(&trace_lock); | 456 | spin_lock_irq(&trace_lock); |
456 | atomic_inc(&mmiotrace_enabled); | 457 | atomic_inc(&mmiotrace_enabled); |
@@ -473,6 +474,7 @@ void disable_mmiotrace(void) | |||
473 | 474 | ||
474 | clear_trace_list(); /* guarantees: no more kmmio callbacks */ | 475 | clear_trace_list(); /* guarantees: no more kmmio callbacks */ |
475 | leave_uniprocessor(); | 476 | leave_uniprocessor(); |
477 | kmmio_cleanup(); | ||
476 | pr_info(NAME "disabled.\n"); | 478 | pr_info(NAME "disabled.\n"); |
477 | out: | 479 | out: |
478 | mutex_unlock(&mmiotrace_mutex); | 480 | mutex_unlock(&mmiotrace_mutex); |