aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorMarkus Metzger <markus.t.metzger@intel.com>2008-12-11 07:49:59 -0500
committerIngo Molnar <mingo@elte.hu>2008-12-12 02:08:12 -0500
commitc2724775ce57c98b8af9694857b941dc61056516 (patch)
treec3936699317da3233bc31e92d68cb582ec17d193 /arch/x86
parentb0884e25fe361f2ca228808fb5fd1b74cb04e711 (diff)
x86, bts: provide in-kernel branch-trace interface
Impact: cleanup Move the BTS bits from ptrace.c into ds.c. Signed-off-by: Markus Metzger <markus.t.metzger@intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/include/asm/ds.h241
-rw-r--r--arch/x86/include/asm/processor.h13
-rw-r--r--arch/x86/include/asm/ptrace.h36
-rw-r--r--arch/x86/include/asm/thread_info.h5
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/ds.c857
-rw-r--r--arch/x86/kernel/process_32.c59
-rw-r--r--arch/x86/kernel/process_64.c50
-rw-r--r--arch/x86/kernel/ptrace.c416
9 files changed, 810 insertions, 871 deletions
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index 99b6c39774a4..ee0ea3a96c11 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -6,13 +6,13 @@
6 * precise-event based sampling (PEBS). 6 * precise-event based sampling (PEBS).
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - DS and BTS hardware configuration
10 * - buffer overflow handling (to be done) 10 * - buffer overflow handling (to be done)
11 * - buffer access 11 * - buffer access
12 * 12 *
13 * It assumes: 13 * It does not do:
14 * - get_task_struct on all traced tasks 14 * - security checking (is the caller allowed to trace the task)
15 * - current is allowed to trace tasks 15 * - buffer allocation (memory accounting)
16 * 16 *
17 * 17 *
18 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -31,6 +31,7 @@
31#ifdef CONFIG_X86_DS 31#ifdef CONFIG_X86_DS
32 32
33struct task_struct; 33struct task_struct;
34struct ds_context;
34struct ds_tracer; 35struct ds_tracer;
35struct bts_tracer; 36struct bts_tracer;
36struct pebs_tracer; 37struct pebs_tracer;
@@ -38,6 +39,38 @@ struct pebs_tracer;
38typedef void (*bts_ovfl_callback_t)(struct bts_tracer *); 39typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
39typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); 40typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
40 41
42
43/*
44 * A list of features plus corresponding macros to talk about them in
45 * the ds_request function's flags parameter.
46 *
47 * We use the enum to index an array of corresponding control bits;
48 * we use the macro to index a flags bit-vector.
49 */
50enum ds_feature {
51 dsf_bts = 0,
52 dsf_bts_kernel,
53#define BTS_KERNEL (1 << dsf_bts_kernel)
54 /* trace kernel-mode branches */
55
56 dsf_bts_user,
57#define BTS_USER (1 << dsf_bts_user)
58 /* trace user-mode branches */
59
60 dsf_bts_overflow,
61 dsf_bts_max,
62 dsf_pebs = dsf_bts_max,
63
64 dsf_pebs_max,
65 dsf_ctl_max = dsf_pebs_max,
66 dsf_bts_timestamps = dsf_ctl_max,
67#define BTS_TIMESTAMPS (1 << dsf_bts_timestamps)
68 /* add timestamps into BTS trace */
69
70#define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS)
71};
72
73
41/* 74/*
42 * Request BTS or PEBS 75 * Request BTS or PEBS
43 * 76 *
@@ -58,92 +91,135 @@ typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
58 * NULL if cyclic buffer requested 91 * NULL if cyclic buffer requested
59 * th: the interrupt threshold in records from the end of the buffer; 92 * th: the interrupt threshold in records from the end of the buffer;
60 * -1 if no interrupt threshold is requested. 93 * -1 if no interrupt threshold is requested.
94 * flags: a bit-mask of the above flags
61 */ 95 */
62extern struct bts_tracer *ds_request_bts(struct task_struct *task, 96extern struct bts_tracer *ds_request_bts(struct task_struct *task,
63 void *base, size_t size, 97 void *base, size_t size,
64 bts_ovfl_callback_t ovfl, size_t th); 98 bts_ovfl_callback_t ovfl,
99 size_t th, unsigned int flags);
65extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, 100extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
66 void *base, size_t size, 101 void *base, size_t size,
67 pebs_ovfl_callback_t ovfl, 102 pebs_ovfl_callback_t ovfl,
68 size_t th); 103 size_t th, unsigned int flags);
69 104
70/* 105/*
71 * Release BTS or PEBS resources 106 * Release BTS or PEBS resources
72 * 107 * Suspend and resume BTS or PEBS tracing
73 * Returns 0 on success; -Eerrno otherwise
74 * 108 *
75 * tracer: the tracer handle returned from ds_request_~() 109 * tracer: the tracer handle returned from ds_request_~()
76 */ 110 */
77extern int ds_release_bts(struct bts_tracer *tracer); 111extern void ds_release_bts(struct bts_tracer *tracer);
78extern int ds_release_pebs(struct pebs_tracer *tracer); 112extern void ds_suspend_bts(struct bts_tracer *tracer);
113extern void ds_resume_bts(struct bts_tracer *tracer);
114extern void ds_release_pebs(struct pebs_tracer *tracer);
115extern void ds_suspend_pebs(struct pebs_tracer *tracer);
116extern void ds_resume_pebs(struct pebs_tracer *tracer);
117
79 118
80/* 119/*
81 * Get the (array) index of the write pointer. 120 * The raw DS buffer state as it is used for BTS and PEBS recording.
82 * (assuming an array of BTS/PEBS records)
83 *
84 * Returns 0 on success; -Eerrno on error
85 * 121 *
86 * tracer: the tracer handle returned from ds_request_~() 122 * This is the low-level, arch-dependent interface for working
87 * pos (out): will hold the result 123 * directly on the raw trace data.
88 */ 124 */
89extern int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos); 125struct ds_trace {
90extern int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos); 126 /* the number of bts/pebs records */
127 size_t n;
128 /* the size of a bts/pebs record in bytes */
129 size_t size;
130 /* pointers into the raw buffer:
131 - to the first entry */
132 void *begin;
133 /* - one beyond the last entry */
134 void *end;
135 /* - one beyond the newest entry */
136 void *top;
137 /* - the interrupt threshold */
138 void *ith;
139 /* flags given on ds_request() */
140 unsigned int flags;
141};
91 142
92/* 143/*
93 * Get the (array) index one record beyond the end of the array. 144 * An arch-independent view on branch trace data.
94 * (assuming an array of BTS/PEBS records)
95 *
96 * Returns 0 on success; -Eerrno on error
97 *
98 * tracer: the tracer handle returned from ds_request_~()
99 * pos (out): will hold the result
100 */ 145 */
101extern int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos); 146enum bts_qualifier {
102extern int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos); 147 bts_invalid,
148#define BTS_INVALID bts_invalid
149
150 bts_branch,
151#define BTS_BRANCH bts_branch
152
153 bts_task_arrives,
154#define BTS_TASK_ARRIVES bts_task_arrives
155
156 bts_task_departs,
157#define BTS_TASK_DEPARTS bts_task_departs
158
159 bts_qual_bit_size = 4,
160 bts_qual_max = (1 << bts_qual_bit_size),
161};
162
163struct bts_struct {
164 __u64 qualifier;
165 union {
166 /* BTS_BRANCH */
167 struct {
168 __u64 from;
169 __u64 to;
170 } lbr;
171 /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
172 struct {
173 __u64 jiffies;
174 pid_t pid;
175 } timestamp;
176 } variant;
177};
178
103 179
104/* 180/*
105 * Provide a pointer to the BTS/PEBS record at parameter index. 181 * The BTS state.
106 * (assuming an array of BTS/PEBS records)
107 *
108 * The pointer points directly into the buffer. The user is
109 * responsible for copying the record.
110 *
111 * Returns the size of a single record on success; -Eerrno on error
112 * 182 *
113 * tracer: the tracer handle returned from ds_request_~() 183 * This gives access to the raw DS state and adds functions to provide
114 * index: the index of the requested record 184 * an arch-independent view of the BTS data.
115 * record (out): pointer to the requested record
116 */ 185 */
117extern int ds_access_bts(struct bts_tracer *tracer, 186struct bts_trace {
118 size_t index, const void **record); 187 struct ds_trace ds;
119extern int ds_access_pebs(struct pebs_tracer *tracer, 188
120 size_t index, const void **record); 189 int (*read)(struct bts_tracer *tracer, const void *at,
190 struct bts_struct *out);
191 int (*write)(struct bts_tracer *tracer, const struct bts_struct *in);
192};
193
121 194
122/* 195/*
123 * Write one or more BTS/PEBS records at the write pointer index and 196 * The PEBS state.
124 * advance the write pointer.
125 * 197 *
126 * If size is not a multiple of the record size, trailing bytes are 198 * This gives access to the raw DS state and the PEBS-specific counter
127 * zeroed out. 199 * reset value.
128 * 200 */
129 * May result in one or more overflow notifications. 201struct pebs_trace {
130 * 202 struct ds_trace ds;
131 * If called during overflow handling, that is, with index >= 203
132 * interrupt threshold, the write will wrap around. 204 /* the PEBS reset value */
205 unsigned long long reset_value;
206};
207
208
209/*
210 * Read the BTS or PEBS trace.
133 * 211 *
134 * An overflow notification is given if and when the interrupt 212 * Returns a view on the trace collected for the parameter tracer.
135 * threshold is reached during or after the write.
136 * 213 *
137 * Returns the number of bytes written or -Eerrno. 214 * The view remains valid as long as the traced task is not running or
215 * the tracer is suspended.
216 * Writes into the trace buffer are not reflected.
138 * 217 *
139 * tracer: the tracer handle returned from ds_request_~() 218 * tracer: the tracer handle returned from ds_request_~()
140 * buffer: the buffer to write
141 * size: the size of the buffer
142 */ 219 */
143extern int ds_write_bts(struct bts_tracer *tracer, 220extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer);
144 const void *buffer, size_t size); 221extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer);
145extern int ds_write_pebs(struct pebs_tracer *tracer, 222
146 const void *buffer, size_t size);
147 223
148/* 224/*
149 * Reset the write pointer of the BTS/PEBS buffer. 225 * Reset the write pointer of the BTS/PEBS buffer.
@@ -156,27 +232,6 @@ extern int ds_reset_bts(struct bts_tracer *tracer);
156extern int ds_reset_pebs(struct pebs_tracer *tracer); 232extern int ds_reset_pebs(struct pebs_tracer *tracer);
157 233
158/* 234/*
159 * Clear the BTS/PEBS buffer and reset the write pointer.
160 * The entire buffer will be zeroed out.
161 *
162 * Returns 0 on success; -Eerrno on error
163 *
164 * tracer: the tracer handle returned from ds_request_~()
165 */
166extern int ds_clear_bts(struct bts_tracer *tracer);
167extern int ds_clear_pebs(struct pebs_tracer *tracer);
168
169/*
170 * Provide the PEBS counter reset value.
171 *
172 * Returns 0 on success; -Eerrno on error
173 *
174 * tracer: the tracer handle returned from ds_request_pebs()
175 * value (out): the counter reset value
176 */
177extern int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value);
178
179/*
180 * Set the PEBS counter reset value. 235 * Set the PEBS counter reset value.
181 * 236 *
182 * Returns 0 on success; -Eerrno on error 237 * Returns 0 on success; -Eerrno on error
@@ -192,35 +247,17 @@ extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
192struct cpuinfo_x86; 247struct cpuinfo_x86;
193extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); 248extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
194 249
195
196
197/* 250/*
198 * The DS context - part of struct thread_struct. 251 * Context switch work
199 */ 252 */
200#define MAX_SIZEOF_DS (12 * 8) 253extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
201
202struct ds_context {
203 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
204 unsigned char ds[MAX_SIZEOF_DS];
205 /* the owner of the BTS and PEBS configuration, respectively */
206 struct ds_tracer *owner[2];
207 /* use count */
208 unsigned long count;
209 /* a pointer to the context location inside the thread_struct
210 * or the per_cpu context array */
211 struct ds_context **this;
212 /* a pointer to the task owning this context, or NULL, if the
213 * context is owned by a cpu */
214 struct task_struct *task;
215};
216
217/* called by exit_thread() to free leftover contexts */
218extern void ds_free(struct ds_context *context);
219 254
220#else /* CONFIG_X86_DS */ 255#else /* CONFIG_X86_DS */
221 256
222struct cpuinfo_x86; 257struct cpuinfo_x86;
223static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} 258static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
259static inline void ds_switch_to(struct task_struct *prev,
260 struct task_struct *next) {}
224 261
225#endif /* CONFIG_X86_DS */ 262#endif /* CONFIG_X86_DS */
226#endif /* _ASM_X86_DS_H */ 263#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 5ca01e383269..aa5914f8e501 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -752,6 +752,19 @@ extern void switch_to_new_gdt(void);
752extern void cpu_init(void); 752extern void cpu_init(void);
753extern void init_gdt(int cpu); 753extern void init_gdt(int cpu);
754 754
755static inline unsigned long get_debugctlmsr(void)
756{
757 unsigned long debugctlmsr = 0;
758
759#ifndef CONFIG_X86_DEBUGCTLMSR
760 if (boot_cpu_data.x86 < 6)
761 return 0;
762#endif
763 rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
764
765 return debugctlmsr;
766}
767
755static inline void update_debugctlmsr(unsigned long debugctlmsr) 768static inline void update_debugctlmsr(unsigned long debugctlmsr)
756{ 769{
757#ifndef CONFIG_X86_DEBUGCTLMSR 770#ifndef CONFIG_X86_DEBUGCTLMSR
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index eefb0594b058..fbf744215911 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -6,7 +6,6 @@
6#include <asm/processor-flags.h> 6#include <asm/processor-flags.h>
7 7
8#ifdef __KERNEL__ 8#ifdef __KERNEL__
9#include <asm/ds.h> /* the DS BTS struct is used for ptrace too */
10#include <asm/segment.h> 9#include <asm/segment.h>
11#endif 10#endif
12 11
@@ -128,34 +127,6 @@ struct pt_regs {
128#endif /* !__i386__ */ 127#endif /* !__i386__ */
129 128
130 129
131#ifdef CONFIG_X86_PTRACE_BTS
132/* a branch trace record entry
133 *
134 * In order to unify the interface between various processor versions,
135 * we use the below data structure for all processors.
136 */
137enum bts_qualifier {
138 BTS_INVALID = 0,
139 BTS_BRANCH,
140 BTS_TASK_ARRIVES,
141 BTS_TASK_DEPARTS
142};
143
144struct bts_struct {
145 __u64 qualifier;
146 union {
147 /* BTS_BRANCH */
148 struct {
149 __u64 from_ip;
150 __u64 to_ip;
151 } lbr;
152 /* BTS_TASK_ARRIVES or
153 BTS_TASK_DEPARTS */
154 __u64 jiffies;
155 } variant;
156};
157#endif /* CONFIG_X86_PTRACE_BTS */
158
159#ifdef __KERNEL__ 130#ifdef __KERNEL__
160 131
161#include <linux/init.h> 132#include <linux/init.h>
@@ -163,13 +134,6 @@ struct bts_struct {
163struct cpuinfo_x86; 134struct cpuinfo_x86;
164struct task_struct; 135struct task_struct;
165 136
166#ifdef CONFIG_X86_PTRACE_BTS
167extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *);
168extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier);
169#else
170#define ptrace_bts_init_intel(config) do {} while (0)
171#endif /* CONFIG_X86_PTRACE_BTS */
172
173extern unsigned long profile_pc(struct pt_regs *regs); 137extern unsigned long profile_pc(struct pt_regs *regs);
174 138
175extern unsigned long 139extern unsigned long
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 0921b4018c11..bf8113d16a33 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -93,7 +93,6 @@ struct thread_info {
93#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ 93#define TIF_FORCED_TF 24 /* true if TF in eflags artificially */
94#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ 94#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */
95#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ 95#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */
96#define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */
97 96
98#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) 97#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
99#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) 98#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -115,7 +114,6 @@ struct thread_info {
115#define _TIF_FORCED_TF (1 << TIF_FORCED_TF) 114#define _TIF_FORCED_TF (1 << TIF_FORCED_TF)
116#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) 115#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR)
117#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) 116#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR)
118#define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS)
119 117
120/* work to do in syscall_trace_enter() */ 118/* work to do in syscall_trace_enter() */
121#define _TIF_WORK_SYSCALL_ENTRY \ 119#define _TIF_WORK_SYSCALL_ENTRY \
@@ -141,8 +139,7 @@ struct thread_info {
141 139
142/* flags to check in __switch_to() */ 140/* flags to check in __switch_to() */
143#define _TIF_WORK_CTXSW \ 141#define _TIF_WORK_CTXSW \
144 (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \ 142 (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
145 _TIF_NOTSC)
146 143
147#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW 144#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
148#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) 145#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 816f27f289b1..cd413d9a0218 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -11,7 +11,6 @@
11#include <asm/pgtable.h> 11#include <asm/pgtable.h>
12#include <asm/msr.h> 12#include <asm/msr.h>
13#include <asm/uaccess.h> 13#include <asm/uaccess.h>
14#include <asm/ptrace.h>
15#include <asm/ds.h> 14#include <asm/ds.h>
16#include <asm/bugs.h> 15#include <asm/bugs.h>
17 16
@@ -309,9 +308,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
309 set_cpu_cap(c, X86_FEATURE_P3); 308 set_cpu_cap(c, X86_FEATURE_P3);
310#endif 309#endif
311 310
312 if (cpu_has_bts)
313 ptrace_bts_init_intel(c);
314
315 detect_extended_topology(c); 311 detect_extended_topology(c);
316 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { 312 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
317 /* 313 /*
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 095306988667..f0583005b75e 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -6,13 +6,13 @@
6 * precise-event based sampling (PEBS). 6 * precise-event based sampling (PEBS).
7 * 7 *
8 * It manages: 8 * It manages:
9 * - per-thread and per-cpu allocation of BTS and PEBS 9 * - DS and BTS hardware configuration
10 * - buffer overflow handling (to be done) 10 * - buffer overflow handling (to be done)
11 * - buffer access 11 * - buffer access
12 * 12 *
13 * It assumes: 13 * It does not do:
14 * - get_task_struct on all traced tasks 14 * - security checking (is the caller allowed to trace the task)
15 * - current is allowed to trace tasks 15 * - buffer allocation (memory accounting)
16 * 16 *
17 * 17 *
18 * Copyright (C) 2007-2008 Intel Corporation. 18 * Copyright (C) 2007-2008 Intel Corporation.
@@ -34,15 +34,30 @@
34 * The configuration for a particular DS hardware implementation. 34 * The configuration for a particular DS hardware implementation.
35 */ 35 */
36struct ds_configuration { 36struct ds_configuration {
37 /* the size of the DS structure in bytes */ 37 /* the name of the configuration */
38 unsigned char sizeof_ds; 38 const char *name;
39 /* the size of one pointer-typed field in the DS structure in bytes; 39 /* the size of one pointer-typed field in the DS structure and
40 this covers the first 8 fields related to buffer management. */ 40 in the BTS and PEBS buffers in bytes;
41 this covers the first 8 DS fields related to buffer management. */
41 unsigned char sizeof_field; 42 unsigned char sizeof_field;
42 /* the size of a BTS/PEBS record in bytes */ 43 /* the size of a BTS/PEBS record in bytes */
43 unsigned char sizeof_rec[2]; 44 unsigned char sizeof_rec[2];
45 /* a series of bit-masks to control various features indexed
46 * by enum ds_feature */
47 unsigned long ctl[dsf_ctl_max];
44}; 48};
45static struct ds_configuration ds_cfg; 49static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
50
51#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
52
53#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */
54#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */
55#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
56
57#define BTS_CONTROL \
58 (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
59 ds_cfg.ctl[dsf_bts_overflow])
60
46 61
47/* 62/*
48 * A BTS or PEBS tracer. 63 * A BTS or PEBS tracer.
@@ -61,6 +76,8 @@ struct ds_tracer {
61struct bts_tracer { 76struct bts_tracer {
62 /* the common DS part */ 77 /* the common DS part */
63 struct ds_tracer ds; 78 struct ds_tracer ds;
79 /* the trace including the DS configuration */
80 struct bts_trace trace;
64 /* buffer overflow notification function */ 81 /* buffer overflow notification function */
65 bts_ovfl_callback_t ovfl; 82 bts_ovfl_callback_t ovfl;
66}; 83};
@@ -68,6 +85,8 @@ struct bts_tracer {
68struct pebs_tracer { 85struct pebs_tracer {
69 /* the common DS part */ 86 /* the common DS part */
70 struct ds_tracer ds; 87 struct ds_tracer ds;
88 /* the trace including the DS configuration */
89 struct pebs_trace trace;
71 /* buffer overflow notification function */ 90 /* buffer overflow notification function */
72 pebs_ovfl_callback_t ovfl; 91 pebs_ovfl_callback_t ovfl;
73}; 92};
@@ -134,13 +153,11 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
134 (*(unsigned long *)base) = value; 153 (*(unsigned long *)base) = value;
135} 154}
136 155
137#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
138
139 156
140/* 157/*
141 * Locking is done only for allocating BTS or PEBS resources. 158 * Locking is done only for allocating BTS or PEBS resources.
142 */ 159 */
143static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); 160static DEFINE_SPINLOCK(ds_lock);
144 161
145 162
146/* 163/*
@@ -156,27 +173,32 @@ static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock);
156 * >0 number of per-thread tracers 173 * >0 number of per-thread tracers
157 * <0 number of per-cpu tracers 174 * <0 number of per-cpu tracers
158 * 175 *
159 * The below functions to get and put tracers and to check the
160 * allocation type require the ds_lock to be held by the caller.
161 *
162 * Tracers essentially gives the number of ds contexts for a certain 176 * Tracers essentially gives the number of ds contexts for a certain
163 * type of allocation. 177 * type of allocation.
164 */ 178 */
165static long tracers; 179static atomic_t tracers = ATOMIC_INIT(0);
166 180
167static inline void get_tracer(struct task_struct *task) 181static inline void get_tracer(struct task_struct *task)
168{ 182{
169 tracers += (task ? 1 : -1); 183 if (task)
184 atomic_inc(&tracers);
185 else
186 atomic_dec(&tracers);
170} 187}
171 188
172static inline void put_tracer(struct task_struct *task) 189static inline void put_tracer(struct task_struct *task)
173{ 190{
174 tracers -= (task ? 1 : -1); 191 if (task)
192 atomic_dec(&tracers);
193 else
194 atomic_inc(&tracers);
175} 195}
176 196
177static inline int check_tracer(struct task_struct *task) 197static inline int check_tracer(struct task_struct *task)
178{ 198{
179 return (task ? (tracers >= 0) : (tracers <= 0)); 199 return task ?
200 (atomic_read(&tracers) >= 0) :
201 (atomic_read(&tracers) <= 0);
180} 202}
181 203
182 204
@@ -190,14 +212,30 @@ static inline int check_tracer(struct task_struct *task)
190 * Contexts are use-counted. They are allocated on first access and 212 * Contexts are use-counted. They are allocated on first access and
191 * deallocated when the last user puts the context. 213 * deallocated when the last user puts the context.
192 */ 214 */
193static DEFINE_PER_CPU(struct ds_context *, system_context); 215struct ds_context {
216 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
217 unsigned char ds[MAX_SIZEOF_DS];
218 /* the owner of the BTS and PEBS configuration, respectively */
219 struct bts_tracer *bts_master;
220 struct pebs_tracer *pebs_master;
221 /* use count */
222 unsigned long count;
223 /* a pointer to the context location inside the thread_struct
224 * or the per_cpu context array */
225 struct ds_context **this;
226 /* a pointer to the task owning this context, or NULL, if the
227 * context is owned by a cpu */
228 struct task_struct *task;
229};
230
231static DEFINE_PER_CPU(struct ds_context *, system_context_array);
194 232
195#define this_system_context per_cpu(system_context, smp_processor_id()) 233#define system_context per_cpu(system_context_array, smp_processor_id())
196 234
197static inline struct ds_context *ds_get_context(struct task_struct *task) 235static inline struct ds_context *ds_get_context(struct task_struct *task)
198{ 236{
199 struct ds_context **p_context = 237 struct ds_context **p_context =
200 (task ? &task->thread.ds_ctx : &this_system_context); 238 (task ? &task->thread.ds_ctx : &system_context);
201 struct ds_context *context = *p_context; 239 struct ds_context *context = *p_context;
202 unsigned long irq; 240 unsigned long irq;
203 241
@@ -225,10 +263,22 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
225 wrmsrl(MSR_IA32_DS_AREA, 263 wrmsrl(MSR_IA32_DS_AREA,
226 (unsigned long)context->ds); 264 (unsigned long)context->ds);
227 } 265 }
266
267 context->count++;
268
269 spin_unlock_irqrestore(&ds_lock, irq);
270 } else {
271 spin_lock_irqsave(&ds_lock, irq);
272
273 context = *p_context;
274 if (context)
275 context->count++;
276
228 spin_unlock_irqrestore(&ds_lock, irq); 277 spin_unlock_irqrestore(&ds_lock, irq);
229 }
230 278
231 context->count++; 279 if (!context)
280 context = ds_get_context(task);
281 }
232 282
233 return context; 283 return context;
234} 284}
@@ -242,8 +292,10 @@ static inline void ds_put_context(struct ds_context *context)
242 292
243 spin_lock_irqsave(&ds_lock, irq); 293 spin_lock_irqsave(&ds_lock, irq);
244 294
245 if (--context->count) 295 if (--context->count) {
246 goto out; 296 spin_unlock_irqrestore(&ds_lock, irq);
297 return;
298 }
247 299
248 *(context->this) = NULL; 300 *(context->this) = NULL;
249 301
@@ -253,14 +305,14 @@ static inline void ds_put_context(struct ds_context *context)
253 if (!context->task || (context->task == current)) 305 if (!context->task || (context->task == current))
254 wrmsrl(MSR_IA32_DS_AREA, 0); 306 wrmsrl(MSR_IA32_DS_AREA, 0);
255 307
256 kfree(context);
257 out:
258 spin_unlock_irqrestore(&ds_lock, irq); 308 spin_unlock_irqrestore(&ds_lock, irq);
309
310 kfree(context);
259} 311}
260 312
261 313
262/* 314/*
263 * Handle a buffer overflow 315 * Call the tracer's callback on a buffer overflow.
264 * 316 *
265 * context: the ds context 317 * context: the ds context
266 * qual: the buffer type 318 * qual: the buffer type
@@ -268,30 +320,244 @@ static inline void ds_put_context(struct ds_context *context)
268static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) 320static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
269{ 321{
270 switch (qual) { 322 switch (qual) {
271 case ds_bts: { 323 case ds_bts:
272 struct bts_tracer *tracer = 324 if (context->bts_master &&
273 container_of(context->owner[qual], 325 context->bts_master->ovfl)
274 struct bts_tracer, ds); 326 context->bts_master->ovfl(context->bts_master);
275 if (tracer->ovfl) 327 break;
276 tracer->ovfl(tracer); 328 case ds_pebs:
277 } 329 if (context->pebs_master &&
330 context->pebs_master->ovfl)
331 context->pebs_master->ovfl(context->pebs_master);
278 break; 332 break;
279 case ds_pebs: {
280 struct pebs_tracer *tracer =
281 container_of(context->owner[qual],
282 struct pebs_tracer, ds);
283 if (tracer->ovfl)
284 tracer->ovfl(tracer);
285 } 333 }
334}
335
336
337/*
338 * Write raw data into the BTS or PEBS buffer.
339 *
340 * The remainder of any partially written record is zeroed out.
341 *
342 * context: the DS context
343 * qual: the buffer type
344 * record: the data to write
345 * size: the size of the data
346 */
347static int ds_write(struct ds_context *context, enum ds_qualifier qual,
348 const void *record, size_t size)
349{
350 int bytes_written = 0;
351
352 if (!record)
353 return -EINVAL;
354
355 while (size) {
356 unsigned long base, index, end, write_end, int_th;
357 unsigned long write_size, adj_write_size;
358
359 /*
360 * write as much as possible without producing an
361 * overflow interrupt.
362 *
363 * interrupt_threshold must either be
364 * - bigger than absolute_maximum or
365 * - point to a record between buffer_base and absolute_maximum
366 *
367 * index points to a valid record.
368 */
369 base = ds_get(context->ds, qual, ds_buffer_base);
370 index = ds_get(context->ds, qual, ds_index);
371 end = ds_get(context->ds, qual, ds_absolute_maximum);
372 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
373
374 write_end = min(end, int_th);
375
376 /* if we are already beyond the interrupt threshold,
377 * we fill the entire buffer */
378 if (write_end <= index)
379 write_end = end;
380
381 if (write_end <= index)
382 break;
383
384 write_size = min((unsigned long) size, write_end - index);
385 memcpy((void *)index, record, write_size);
386
387 record = (const char *)record + write_size;
388 size -= write_size;
389 bytes_written += write_size;
390
391 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
392 adj_write_size *= ds_cfg.sizeof_rec[qual];
393
394 /* zero out trailing bytes */
395 memset((char *)index + write_size, 0,
396 adj_write_size - write_size);
397 index += adj_write_size;
398
399 if (index >= end)
400 index = base;
401 ds_set(context->ds, qual, ds_index, index);
402
403 if (index >= int_th)
404 ds_overflow(context, qual);
405 }
406
407 return bytes_written;
408}
409
410
411/*
412 * Branch Trace Store (BTS) uses the following format. Different
413 * architectures vary in the size of those fields.
414 * - source linear address
415 * - destination linear address
416 * - flags
417 *
418 * Later architectures use 64bit pointers throughout, whereas earlier
419 * architectures use 32bit pointers in 32bit mode.
420 *
421 * We compute the base address for the first 8 fields based on:
422 * - the field size stored in the DS configuration
423 * - the relative field position
424 *
425 * In order to store additional information in the BTS buffer, we use
426 * a special source address to indicate that the record requires
427 * special interpretation.
428 *
429 * Netburst indicated via a bit in the flags field whether the branch
430 * was predicted; this is ignored.
431 *
432 * We use two levels of abstraction:
433 * - the raw data level defined here
434 * - an arch-independent level defined in ds.h
435 */
436
437enum bts_field {
438 bts_from,
439 bts_to,
440 bts_flags,
441
442 bts_qual = bts_from,
443 bts_jiffies = bts_to,
444 bts_pid = bts_flags,
445
446 bts_qual_mask = (bts_qual_max - 1),
447 bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
448};
449
450static inline unsigned long bts_get(const char *base, enum bts_field field)
451{
452 base += (ds_cfg.sizeof_field * field);
453 return *(unsigned long *)base;
454}
455
456static inline void bts_set(char *base, enum bts_field field, unsigned long val)
457{
458 base += (ds_cfg.sizeof_field * field);;
459 (*(unsigned long *)base) = val;
460}
461
462
463/*
464 * The raw BTS data is architecture dependent.
465 *
466 * For higher-level users, we give an arch-independent view.
467 * - ds.h defines struct bts_struct
468 * - bts_read translates one raw bts record into a bts_struct
469 * - bts_write translates one bts_struct into the raw format and
470 * writes it into the top of the parameter tracer's buffer.
471 *
472 * return: bytes read/written on success; -Eerrno, otherwise
473 */
474static int bts_read(struct bts_tracer *tracer, const void *at,
475 struct bts_struct *out)
476{
477 if (!tracer)
478 return -EINVAL;
479
480 if (at < tracer->trace.ds.begin)
481 return -EINVAL;
482
483 if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
484 return -EINVAL;
485
486 memset(out, 0, sizeof(*out));
487 if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
488 out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
489 out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
490 out->variant.timestamp.pid = bts_get(at, bts_pid);
491 } else {
492 out->qualifier = bts_branch;
493 out->variant.lbr.from = bts_get(at, bts_from);
494 out->variant.lbr.to = bts_get(at, bts_to);
495 }
496
497 return ds_cfg.sizeof_rec[ds_bts];
498}
499
500static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
501{
502 unsigned char raw[MAX_SIZEOF_BTS];
503
504 if (!tracer)
505 return -EINVAL;
506
507 if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
508 return -EOVERFLOW;
509
510 switch (in->qualifier) {
511 case bts_invalid:
512 bts_set(raw, bts_from, 0);
513 bts_set(raw, bts_to, 0);
514 bts_set(raw, bts_flags, 0);
515 break;
516 case bts_branch:
517 bts_set(raw, bts_from, in->variant.lbr.from);
518 bts_set(raw, bts_to, in->variant.lbr.to);
519 bts_set(raw, bts_flags, 0);
520 break;
521 case bts_task_arrives:
522 case bts_task_departs:
523 bts_set(raw, bts_qual, (bts_escape | in->qualifier));
524 bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
525 bts_set(raw, bts_pid, in->variant.timestamp.pid);
286 break; 526 break;
527 default:
528 return -EINVAL;
287 } 529 }
530
531 return ds_write(tracer->ds.context, ds_bts, raw,
532 ds_cfg.sizeof_rec[ds_bts]);
288} 533}
289 534
290 535
291static void ds_install_ds_config(struct ds_context *context, 536static void ds_write_config(struct ds_context *context,
292 enum ds_qualifier qual, 537 struct ds_trace *cfg, enum ds_qualifier qual)
293 void *base, size_t size, size_t ith) 538{
539 unsigned char *ds = context->ds;
540
541 ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
542 ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
543 ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
544 ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
545}
546
547static void ds_read_config(struct ds_context *context,
548 struct ds_trace *cfg, enum ds_qualifier qual)
294{ 549{
550 unsigned char *ds = context->ds;
551
552 cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
553 cfg->top = (void *)ds_get(ds, qual, ds_index);
554 cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
555 cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
556}
557
558static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
559 void *base, size_t size, size_t ith,
560 unsigned int flags) {
295 unsigned long buffer, adj; 561 unsigned long buffer, adj;
296 562
297 /* adjust the buffer address and size to meet alignment 563 /* adjust the buffer address and size to meet alignment
@@ -308,32 +574,30 @@ static void ds_install_ds_config(struct ds_context *context,
308 buffer += adj; 574 buffer += adj;
309 size -= adj; 575 size -= adj;
310 576
311 size /= ds_cfg.sizeof_rec[qual]; 577 trace->n = size / ds_cfg.sizeof_rec[qual];
312 size *= ds_cfg.sizeof_rec[qual]; 578 trace->size = ds_cfg.sizeof_rec[qual];
313 579
314 ds_set(context->ds, qual, ds_buffer_base, buffer); 580 size = (trace->n * trace->size);
315 ds_set(context->ds, qual, ds_index, buffer);
316 ds_set(context->ds, qual, ds_absolute_maximum, buffer + size);
317 581
582 trace->begin = (void *)buffer;
583 trace->top = trace->begin;
584 trace->end = (void *)(buffer + size);
318 /* The value for 'no threshold' is -1, which will set the 585 /* The value for 'no threshold' is -1, which will set the
319 * threshold outside of the buffer, just like we want it. 586 * threshold outside of the buffer, just like we want it.
320 */ 587 */
321 ds_set(context->ds, qual, 588 trace->ith = (void *)(buffer + size - ith);
322 ds_interrupt_threshold, buffer + size - ith); 589
590 trace->flags = flags;
323} 591}
324 592
325static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual, 593
326 struct task_struct *task, 594static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
327 void *base, size_t size, size_t th) 595 enum ds_qualifier qual, struct task_struct *task,
596 void *base, size_t size, size_t th, unsigned int flags)
328{ 597{
329 struct ds_context *context; 598 struct ds_context *context;
330 unsigned long irq;
331 int error; 599 int error;
332 600
333 error = -EOPNOTSUPP;
334 if (!ds_cfg.sizeof_ds)
335 goto out;
336
337 error = -EINVAL; 601 error = -EINVAL;
338 if (!base) 602 if (!base)
339 goto out; 603 goto out;
@@ -360,43 +624,26 @@ static int ds_request(struct ds_tracer *tracer, enum ds_qualifier qual,
360 goto out; 624 goto out;
361 tracer->context = context; 625 tracer->context = context;
362 626
627 ds_init_ds_trace(trace, qual, base, size, th, flags);
363 628
364 spin_lock_irqsave(&ds_lock, irq); 629 error = 0;
365
366 error = -EPERM;
367 if (!check_tracer(task))
368 goto out_unlock;
369 get_tracer(task);
370
371 error = -EPERM;
372 if (context->owner[qual])
373 goto out_put_tracer;
374 context->owner[qual] = tracer;
375
376 spin_unlock_irqrestore(&ds_lock, irq);
377
378
379 ds_install_ds_config(context, qual, base, size, th);
380
381 return 0;
382
383 out_put_tracer:
384 put_tracer(task);
385 out_unlock:
386 spin_unlock_irqrestore(&ds_lock, irq);
387 ds_put_context(context);
388 tracer->context = NULL;
389 out: 630 out:
390 return error; 631 return error;
391} 632}
392 633
393struct bts_tracer *ds_request_bts(struct task_struct *task, 634struct bts_tracer *ds_request_bts(struct task_struct *task,
394 void *base, size_t size, 635 void *base, size_t size,
395 bts_ovfl_callback_t ovfl, size_t th) 636 bts_ovfl_callback_t ovfl, size_t th,
637 unsigned int flags)
396{ 638{
397 struct bts_tracer *tracer; 639 struct bts_tracer *tracer;
640 unsigned long irq;
398 int error; 641 int error;
399 642
643 error = -EOPNOTSUPP;
644 if (!ds_cfg.ctl[dsf_bts])
645 goto out;
646
400 /* buffer overflow notification is not yet implemented */ 647 /* buffer overflow notification is not yet implemented */
401 error = -EOPNOTSUPP; 648 error = -EOPNOTSUPP;
402 if (ovfl) 649 if (ovfl)
@@ -408,12 +655,40 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
408 goto out; 655 goto out;
409 tracer->ovfl = ovfl; 656 tracer->ovfl = ovfl;
410 657
411 error = ds_request(&tracer->ds, ds_bts, task, base, size, th); 658 error = ds_request(&tracer->ds, &tracer->trace.ds,
659 ds_bts, task, base, size, th, flags);
412 if (error < 0) 660 if (error < 0)
413 goto out_tracer; 661 goto out_tracer;
414 662
663
664 spin_lock_irqsave(&ds_lock, irq);
665
666 error = -EPERM;
667 if (!check_tracer(task))
668 goto out_unlock;
669 get_tracer(task);
670
671 error = -EPERM;
672 if (tracer->ds.context->bts_master)
673 goto out_put_tracer;
674 tracer->ds.context->bts_master = tracer;
675
676 spin_unlock_irqrestore(&ds_lock, irq);
677
678
679 tracer->trace.read = bts_read;
680 tracer->trace.write = bts_write;
681
682 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
683 ds_resume_bts(tracer);
684
415 return tracer; 685 return tracer;
416 686
687 out_put_tracer:
688 put_tracer(task);
689 out_unlock:
690 spin_unlock_irqrestore(&ds_lock, irq);
691 ds_put_context(tracer->ds.context);
417 out_tracer: 692 out_tracer:
418 kfree(tracer); 693 kfree(tracer);
419 out: 694 out:
@@ -422,9 +697,11 @@ struct bts_tracer *ds_request_bts(struct task_struct *task,
422 697
423struct pebs_tracer *ds_request_pebs(struct task_struct *task, 698struct pebs_tracer *ds_request_pebs(struct task_struct *task,
424 void *base, size_t size, 699 void *base, size_t size,
425 pebs_ovfl_callback_t ovfl, size_t th) 700 pebs_ovfl_callback_t ovfl, size_t th,
701 unsigned int flags)
426{ 702{
427 struct pebs_tracer *tracer; 703 struct pebs_tracer *tracer;
704 unsigned long irq;
428 int error; 705 int error;
429 706
430 /* buffer overflow notification is not yet implemented */ 707 /* buffer overflow notification is not yet implemented */
@@ -438,300 +715,171 @@ struct pebs_tracer *ds_request_pebs(struct task_struct *task,
438 goto out; 715 goto out;
439 tracer->ovfl = ovfl; 716 tracer->ovfl = ovfl;
440 717
441 error = ds_request(&tracer->ds, ds_pebs, task, base, size, th); 718 error = ds_request(&tracer->ds, &tracer->trace.ds,
719 ds_pebs, task, base, size, th, flags);
442 if (error < 0) 720 if (error < 0)
443 goto out_tracer; 721 goto out_tracer;
444 722
723 spin_lock_irqsave(&ds_lock, irq);
724
725 error = -EPERM;
726 if (!check_tracer(task))
727 goto out_unlock;
728 get_tracer(task);
729
730 error = -EPERM;
731 if (tracer->ds.context->pebs_master)
732 goto out_put_tracer;
733 tracer->ds.context->pebs_master = tracer;
734
735 spin_unlock_irqrestore(&ds_lock, irq);
736
737 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
738 ds_resume_pebs(tracer);
739
445 return tracer; 740 return tracer;
446 741
742 out_put_tracer:
743 put_tracer(task);
744 out_unlock:
745 spin_unlock_irqrestore(&ds_lock, irq);
746 ds_put_context(tracer->ds.context);
447 out_tracer: 747 out_tracer:
448 kfree(tracer); 748 kfree(tracer);
449 out: 749 out:
450 return ERR_PTR(error); 750 return ERR_PTR(error);
451} 751}
452 752
453static void ds_release(struct ds_tracer *tracer, enum ds_qualifier qual) 753void ds_release_bts(struct bts_tracer *tracer)
454{
455 WARN_ON_ONCE(tracer->context->owner[qual] != tracer);
456 tracer->context->owner[qual] = NULL;
457
458 put_tracer(tracer->context->task);
459 ds_put_context(tracer->context);
460}
461
462int ds_release_bts(struct bts_tracer *tracer)
463{ 754{
464 if (!tracer) 755 if (!tracer)
465 return -EINVAL; 756 return;
466 757
467 ds_release(&tracer->ds, ds_bts); 758 ds_suspend_bts(tracer);
468 kfree(tracer);
469 759
470 return 0; 760 WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
471} 761 tracer->ds.context->bts_master = NULL;
472 762
473int ds_release_pebs(struct pebs_tracer *tracer) 763 put_tracer(tracer->ds.context->task);
474{ 764 ds_put_context(tracer->ds.context);
475 if (!tracer)
476 return -EINVAL;
477 765
478 ds_release(&tracer->ds, ds_pebs);
479 kfree(tracer); 766 kfree(tracer);
480
481 return 0;
482}
483
484static size_t ds_get_index(struct ds_context *context, enum ds_qualifier qual)
485{
486 unsigned long base, index;
487
488 base = ds_get(context->ds, qual, ds_buffer_base);
489 index = ds_get(context->ds, qual, ds_index);
490
491 return (index - base) / ds_cfg.sizeof_rec[qual];
492} 767}
493 768
494int ds_get_bts_index(struct bts_tracer *tracer, size_t *pos) 769void ds_suspend_bts(struct bts_tracer *tracer)
495{ 770{
496 if (!tracer) 771 struct task_struct *task;
497 return -EINVAL;
498 772
499 if (!pos)
500 return -EINVAL;
501
502 *pos = ds_get_index(tracer->ds.context, ds_bts);
503
504 return 0;
505}
506
507int ds_get_pebs_index(struct pebs_tracer *tracer, size_t *pos)
508{
509 if (!tracer) 773 if (!tracer)
510 return -EINVAL; 774 return;
511 775
512 if (!pos) 776 task = tracer->ds.context->task;
513 return -EINVAL;
514 777
515 *pos = ds_get_index(tracer->ds.context, ds_pebs); 778 if (!task || (task == current))
779 update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
516 780
517 return 0; 781 if (task) {
518} 782 task->thread.debugctlmsr &= ~BTS_CONTROL;
519 783
520static size_t ds_get_end(struct ds_context *context, enum ds_qualifier qual) 784 if (!task->thread.debugctlmsr)
521{ 785 clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
522 unsigned long base, max; 786 }
523
524 base = ds_get(context->ds, qual, ds_buffer_base);
525 max = ds_get(context->ds, qual, ds_absolute_maximum);
526
527 return (max - base) / ds_cfg.sizeof_rec[qual];
528} 787}
529 788
530int ds_get_bts_end(struct bts_tracer *tracer, size_t *pos) 789void ds_resume_bts(struct bts_tracer *tracer)
531{ 790{
532 if (!tracer) 791 struct task_struct *task;
533 return -EINVAL; 792 unsigned long control;
534
535 if (!pos)
536 return -EINVAL;
537
538 *pos = ds_get_end(tracer->ds.context, ds_bts);
539
540 return 0;
541}
542 793
543int ds_get_pebs_end(struct pebs_tracer *tracer, size_t *pos)
544{
545 if (!tracer) 794 if (!tracer)
546 return -EINVAL; 795 return;
547
548 if (!pos)
549 return -EINVAL;
550
551 *pos = ds_get_end(tracer->ds.context, ds_pebs);
552
553 return 0;
554}
555
556static int ds_access(struct ds_context *context, enum ds_qualifier qual,
557 size_t index, const void **record)
558{
559 unsigned long base, idx;
560
561 if (!record)
562 return -EINVAL;
563
564 base = ds_get(context->ds, qual, ds_buffer_base);
565 idx = base + (index * ds_cfg.sizeof_rec[qual]);
566
567 if (idx > ds_get(context->ds, qual, ds_absolute_maximum))
568 return -EINVAL;
569 796
570 *record = (const void *)idx; 797 task = tracer->ds.context->task;
571 798
572 return ds_cfg.sizeof_rec[qual]; 799 control = ds_cfg.ctl[dsf_bts];
573} 800 if (!(tracer->trace.ds.flags & BTS_KERNEL))
801 control |= ds_cfg.ctl[dsf_bts_kernel];
802 if (!(tracer->trace.ds.flags & BTS_USER))
803 control |= ds_cfg.ctl[dsf_bts_user];
574 804
575int ds_access_bts(struct bts_tracer *tracer, size_t index, 805 if (task) {
576 const void **record) 806 task->thread.debugctlmsr |= control;
577{ 807 set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
578 if (!tracer) 808 }
579 return -EINVAL;
580 809
581 return ds_access(tracer->ds.context, ds_bts, index, record); 810 if (!task || (task == current))
811 update_debugctlmsr(get_debugctlmsr() | control);
582} 812}
583 813
584int ds_access_pebs(struct pebs_tracer *tracer, size_t index, 814void ds_release_pebs(struct pebs_tracer *tracer)
585 const void **record)
586{ 815{
587 if (!tracer) 816 if (!tracer)
588 return -EINVAL; 817 return;
589
590 return ds_access(tracer->ds.context, ds_pebs, index, record);
591}
592
593static int ds_write(struct ds_context *context, enum ds_qualifier qual,
594 const void *record, size_t size)
595{
596 int bytes_written = 0;
597
598 if (!record)
599 return -EINVAL;
600
601 while (size) {
602 unsigned long base, index, end, write_end, int_th;
603 unsigned long write_size, adj_write_size;
604
605 /*
606 * write as much as possible without producing an
607 * overflow interrupt.
608 *
609 * interrupt_threshold must either be
610 * - bigger than absolute_maximum or
611 * - point to a record between buffer_base and absolute_maximum
612 *
613 * index points to a valid record.
614 */
615 base = ds_get(context->ds, qual, ds_buffer_base);
616 index = ds_get(context->ds, qual, ds_index);
617 end = ds_get(context->ds, qual, ds_absolute_maximum);
618 int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
619
620 write_end = min(end, int_th);
621
622 /* if we are already beyond the interrupt threshold,
623 * we fill the entire buffer */
624 if (write_end <= index)
625 write_end = end;
626
627 if (write_end <= index)
628 break;
629
630 write_size = min((unsigned long) size, write_end - index);
631 memcpy((void *)index, record, write_size);
632
633 record = (const char *)record + write_size;
634 size -= write_size;
635 bytes_written += write_size;
636
637 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
638 adj_write_size *= ds_cfg.sizeof_rec[qual];
639
640 /* zero out trailing bytes */
641 memset((char *)index + write_size, 0,
642 adj_write_size - write_size);
643 index += adj_write_size;
644 818
645 if (index >= end) 819 ds_suspend_pebs(tracer);
646 index = base;
647 ds_set(context->ds, qual, ds_index, index);
648 820
649 if (index >= int_th) 821 WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
650 ds_overflow(context, qual); 822 tracer->ds.context->pebs_master = NULL;
651 }
652 823
653 return bytes_written; 824 put_tracer(tracer->ds.context->task);
654} 825 ds_put_context(tracer->ds.context);
655 826
656int ds_write_bts(struct bts_tracer *tracer, const void *record, size_t size) 827 kfree(tracer);
657{
658 if (!tracer)
659 return -EINVAL;
660
661 return ds_write(tracer->ds.context, ds_bts, record, size);
662} 828}
663 829
664int ds_write_pebs(struct pebs_tracer *tracer, const void *record, size_t size) 830void ds_suspend_pebs(struct pebs_tracer *tracer)
665{ 831{
666 if (!tracer)
667 return -EINVAL;
668 832
669 return ds_write(tracer->ds.context, ds_pebs, record, size);
670} 833}
671 834
672static void ds_reset_or_clear(struct ds_context *context, 835void ds_resume_pebs(struct pebs_tracer *tracer)
673 enum ds_qualifier qual, int clear)
674{ 836{
675 unsigned long base, end;
676
677 base = ds_get(context->ds, qual, ds_buffer_base);
678 end = ds_get(context->ds, qual, ds_absolute_maximum);
679
680 if (clear)
681 memset((void *)base, 0, end - base);
682 837
683 ds_set(context->ds, qual, ds_index, base);
684} 838}
685 839
686int ds_reset_bts(struct bts_tracer *tracer) 840const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
687{ 841{
688 if (!tracer) 842 if (!tracer)
689 return -EINVAL; 843 return NULL;
690
691 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 0);
692 844
693 return 0; 845 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
846 return &tracer->trace;
694} 847}
695 848
696int ds_reset_pebs(struct pebs_tracer *tracer) 849const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
697{ 850{
698 if (!tracer) 851 if (!tracer)
699 return -EINVAL; 852 return NULL;
700 853
701 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 0); 854 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
855 tracer->trace.reset_value =
856 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
702 857
703 return 0; 858 return &tracer->trace;
704} 859}
705 860
706int ds_clear_bts(struct bts_tracer *tracer) 861int ds_reset_bts(struct bts_tracer *tracer)
707{ 862{
708 if (!tracer) 863 if (!tracer)
709 return -EINVAL; 864 return -EINVAL;
710 865
711 ds_reset_or_clear(tracer->ds.context, ds_bts, /* clear = */ 1); 866 tracer->trace.ds.top = tracer->trace.ds.begin;
712
713 return 0;
714}
715
716int ds_clear_pebs(struct pebs_tracer *tracer)
717{
718 if (!tracer)
719 return -EINVAL;
720 867
721 ds_reset_or_clear(tracer->ds.context, ds_pebs, /* clear = */ 1); 868 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
869 (unsigned long)tracer->trace.ds.top);
722 870
723 return 0; 871 return 0;
724} 872}
725 873
726int ds_get_pebs_reset(struct pebs_tracer *tracer, u64 *value) 874int ds_reset_pebs(struct pebs_tracer *tracer)
727{ 875{
728 if (!tracer) 876 if (!tracer)
729 return -EINVAL; 877 return -EINVAL;
730 878
731 if (!value) 879 tracer->trace.ds.top = tracer->trace.ds.begin;
732 return -EINVAL;
733 880
734 *value = *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); 881 ds_set(tracer->ds.context->ds, ds_bts, ds_index,
882 (unsigned long)tracer->trace.ds.top);
735 883
736 return 0; 884 return 0;
737} 885}
@@ -746,35 +894,59 @@ int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
746 return 0; 894 return 0;
747} 895}
748 896
749static const struct ds_configuration ds_cfg_var = { 897static const struct ds_configuration ds_cfg_netburst = {
750 .sizeof_ds = sizeof(long) * 12, 898 .name = "netburst",
751 .sizeof_field = sizeof(long), 899 .ctl[dsf_bts] = (1 << 2) | (1 << 3),
752 .sizeof_rec[ds_bts] = sizeof(long) * 3, 900 .ctl[dsf_bts_kernel] = (1 << 5),
901 .ctl[dsf_bts_user] = (1 << 6),
902
903 .sizeof_field = sizeof(long),
904 .sizeof_rec[ds_bts] = sizeof(long) * 3,
753#ifdef __i386__ 905#ifdef __i386__
754 .sizeof_rec[ds_pebs] = sizeof(long) * 10 906 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
755#else 907#else
756 .sizeof_rec[ds_pebs] = sizeof(long) * 18 908 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
757#endif 909#endif
758}; 910};
759static const struct ds_configuration ds_cfg_64 = { 911static const struct ds_configuration ds_cfg_pentium_m = {
760 .sizeof_ds = 8 * 12, 912 .name = "pentium m",
761 .sizeof_field = 8, 913 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
762 .sizeof_rec[ds_bts] = 8 * 3, 914
915 .sizeof_field = sizeof(long),
916 .sizeof_rec[ds_bts] = sizeof(long) * 3,
763#ifdef __i386__ 917#ifdef __i386__
764 .sizeof_rec[ds_pebs] = 8 * 10 918 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
765#else 919#else
766 .sizeof_rec[ds_pebs] = 8 * 18 920 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
767#endif 921#endif
768}; 922};
923static const struct ds_configuration ds_cfg_core2 = {
924 .name = "core 2",
925 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
926 .ctl[dsf_bts_kernel] = (1 << 9),
927 .ctl[dsf_bts_user] = (1 << 10),
928
929 .sizeof_field = 8,
930 .sizeof_rec[ds_bts] = 8 * 3,
931 .sizeof_rec[ds_pebs] = 8 * 18,
932};
769 933
770static inline void 934static void
771ds_configure(const struct ds_configuration *cfg) 935ds_configure(const struct ds_configuration *cfg)
772{ 936{
937 memset(&ds_cfg, 0, sizeof(ds_cfg));
773 ds_cfg = *cfg; 938 ds_cfg = *cfg;
774 939
775 printk(KERN_INFO "DS available\n"); 940 printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
941
942 if (!cpu_has_bts) {
943 ds_cfg.ctl[dsf_bts] = 0;
944 printk(KERN_INFO "[ds] bts not available\n");
945 }
946 if (!cpu_has_pebs)
947 printk(KERN_INFO "[ds] pebs not available\n");
776 948
777 WARN_ON_ONCE(MAX_SIZEOF_DS < ds_cfg.sizeof_ds); 949 WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
778} 950}
779 951
780void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) 952void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
@@ -787,10 +959,10 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
787 break; 959 break;
788 case 0xD: 960 case 0xD:
789 case 0xE: /* Pentium M */ 961 case 0xE: /* Pentium M */
790 ds_configure(&ds_cfg_var); 962 ds_configure(&ds_cfg_pentium_m);
791 break; 963 break;
792 default: /* Core2, Atom, ... */ 964 default: /* Core2, Atom, ... */
793 ds_configure(&ds_cfg_64); 965 ds_configure(&ds_cfg_core2);
794 break; 966 break;
795 } 967 }
796 break; 968 break;
@@ -799,7 +971,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
799 case 0x0: 971 case 0x0:
800 case 0x1: 972 case 0x1:
801 case 0x2: /* Netburst */ 973 case 0x2: /* Netburst */
802 ds_configure(&ds_cfg_var); 974 ds_configure(&ds_cfg_netburst);
803 break; 975 break;
804 default: 976 default:
805 /* sorry, don't know about them */ 977 /* sorry, don't know about them */
@@ -812,14 +984,41 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
812 } 984 }
813} 985}
814 986
815void ds_free(struct ds_context *context) 987/*
988 * Change the DS configuration from tracing prev to tracing next.
989 */
990void ds_switch_to(struct task_struct *prev, struct task_struct *next)
816{ 991{
817 /* This is called when the task owning the parameter context 992 struct ds_context *prev_ctx = prev->thread.ds_ctx;
818 * is dying. There should not be any user of that context left 993 struct ds_context *next_ctx = next->thread.ds_ctx;
819 * to disturb us, anymore. */ 994
820 unsigned long leftovers = context->count; 995 if (prev_ctx) {
821 while (leftovers--) { 996 update_debugctlmsr(0);
822 put_tracer(context->task); 997
823 ds_put_context(context); 998 if (prev_ctx->bts_master &&
999 (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1000 struct bts_struct ts = {
1001 .qualifier = bts_task_departs,
1002 .variant.timestamp.jiffies = jiffies_64,
1003 .variant.timestamp.pid = prev->pid
1004 };
1005 bts_write(prev_ctx->bts_master, &ts);
1006 }
1007 }
1008
1009 if (next_ctx) {
1010 if (next_ctx->bts_master &&
1011 (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1012 struct bts_struct ts = {
1013 .qualifier = bts_task_arrives,
1014 .variant.timestamp.jiffies = jiffies_64,
1015 .variant.timestamp.pid = next->pid
1016 };
1017 bts_write(next_ctx->bts_master, &ts);
1018 }
1019
1020 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
824 } 1021 }
1022
1023 update_debugctlmsr(next->thread.debugctlmsr);
825} 1024}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 24c2276aa453..605eff9a8ac0 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -252,11 +252,14 @@ void exit_thread(void)
252 put_cpu(); 252 put_cpu();
253 } 253 }
254#ifdef CONFIG_X86_DS 254#ifdef CONFIG_X86_DS
255 /* Free any DS contexts that have not been properly released. */ 255 /* Free any BTS tracers that have not been properly released. */
256 if (unlikely(current->thread.ds_ctx)) { 256 if (unlikely(current->bts)) {
257 /* we clear debugctl to make sure DS is not used. */ 257 ds_release_bts(current->bts);
258 update_debugctlmsr(0); 258 current->bts = NULL;
259 ds_free(current->thread.ds_ctx); 259
260 kfree(current->bts_buffer);
261 current->bts_buffer = NULL;
262 current->bts_size = 0;
260 } 263 }
261#endif /* CONFIG_X86_DS */ 264#endif /* CONFIG_X86_DS */
262} 265}
@@ -420,48 +423,19 @@ int set_tsc_mode(unsigned int val)
420 return 0; 423 return 0;
421} 424}
422 425
423#ifdef CONFIG_X86_DS
424static int update_debugctl(struct thread_struct *prev,
425 struct thread_struct *next, unsigned long debugctl)
426{
427 unsigned long ds_prev = 0;
428 unsigned long ds_next = 0;
429
430 if (prev->ds_ctx)
431 ds_prev = (unsigned long)prev->ds_ctx->ds;
432 if (next->ds_ctx)
433 ds_next = (unsigned long)next->ds_ctx->ds;
434
435 if (ds_next != ds_prev) {
436 /* we clear debugctl to make sure DS
437 * is not in use when we change it */
438 debugctl = 0;
439 update_debugctlmsr(0);
440 wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
441 }
442 return debugctl;
443}
444#else
445static int update_debugctl(struct thread_struct *prev,
446 struct thread_struct *next, unsigned long debugctl)
447{
448 return debugctl;
449}
450#endif /* CONFIG_X86_DS */
451
452static noinline void 426static noinline void
453__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, 427__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
454 struct tss_struct *tss) 428 struct tss_struct *tss)
455{ 429{
456 struct thread_struct *prev, *next; 430 struct thread_struct *prev, *next;
457 unsigned long debugctl;
458 431
459 prev = &prev_p->thread; 432 prev = &prev_p->thread;
460 next = &next_p->thread; 433 next = &next_p->thread;
461 434
462 debugctl = update_debugctl(prev, next, prev->debugctlmsr); 435 if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
463 436 test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
464 if (next->debugctlmsr != debugctl) 437 ds_switch_to(prev_p, next_p);
438 else if (next->debugctlmsr != prev->debugctlmsr)
465 update_debugctlmsr(next->debugctlmsr); 439 update_debugctlmsr(next->debugctlmsr);
466 440
467 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { 441 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
@@ -483,15 +457,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
483 hard_enable_TSC(); 457 hard_enable_TSC();
484 } 458 }
485 459
486#ifdef CONFIG_X86_PTRACE_BTS
487 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
488 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
489
490 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
491 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
492#endif /* CONFIG_X86_PTRACE_BTS */
493
494
495 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { 460 if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
496 /* 461 /*
497 * Disable the bitmap via an invalid offset. We still cache 462 * Disable the bitmap via an invalid offset. We still cache
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index fbb321d53d34..1cfd2a4bf853 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -237,11 +237,14 @@ void exit_thread(void)
237 put_cpu(); 237 put_cpu();
238 } 238 }
239#ifdef CONFIG_X86_DS 239#ifdef CONFIG_X86_DS
240 /* Free any DS contexts that have not been properly released. */ 240 /* Free any BTS tracers that have not been properly released. */
241 if (unlikely(t->ds_ctx)) { 241 if (unlikely(current->bts)) {
242 /* we clear debugctl to make sure DS is not used. */ 242 ds_release_bts(current->bts);
243 update_debugctlmsr(0); 243 current->bts = NULL;
244 ds_free(t->ds_ctx); 244
245 kfree(current->bts_buffer);
246 current->bts_buffer = NULL;
247 current->bts_size = 0;
245 } 248 }
246#endif /* CONFIG_X86_DS */ 249#endif /* CONFIG_X86_DS */
247} 250}
@@ -471,35 +474,14 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
471 struct tss_struct *tss) 474 struct tss_struct *tss)
472{ 475{
473 struct thread_struct *prev, *next; 476 struct thread_struct *prev, *next;
474 unsigned long debugctl;
475 477
476 prev = &prev_p->thread, 478 prev = &prev_p->thread,
477 next = &next_p->thread; 479 next = &next_p->thread;
478 480
479 debugctl = prev->debugctlmsr; 481 if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
480 482 test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
481#ifdef CONFIG_X86_DS 483 ds_switch_to(prev_p, next_p);
482 { 484 else if (next->debugctlmsr != prev->debugctlmsr)
483 unsigned long ds_prev = 0, ds_next = 0;
484
485 if (prev->ds_ctx)
486 ds_prev = (unsigned long)prev->ds_ctx->ds;
487 if (next->ds_ctx)
488 ds_next = (unsigned long)next->ds_ctx->ds;
489
490 if (ds_next != ds_prev) {
491 /*
492 * We clear debugctl to make sure DS
493 * is not in use when we change it:
494 */
495 debugctl = 0;
496 update_debugctlmsr(0);
497 wrmsrl(MSR_IA32_DS_AREA, ds_next);
498 }
499 }
500#endif /* CONFIG_X86_DS */
501
502 if (next->debugctlmsr != debugctl)
503 update_debugctlmsr(next->debugctlmsr); 485 update_debugctlmsr(next->debugctlmsr);
504 486
505 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { 487 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
@@ -534,14 +516,6 @@ static inline void __switch_to_xtra(struct task_struct *prev_p,
534 */ 516 */
535 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); 517 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
536 } 518 }
537
538#ifdef CONFIG_X86_PTRACE_BTS
539 if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
540 ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
541
542 if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
543 ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
544#endif /* CONFIG_X86_PTRACE_BTS */
545} 519}
546 520
547/* 521/*
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index b2998fe1166b..45e9855da2d2 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -581,153 +581,73 @@ static int ioperm_get(struct task_struct *target,
581} 581}
582 582
583#ifdef CONFIG_X86_PTRACE_BTS 583#ifdef CONFIG_X86_PTRACE_BTS
584/*
585 * The configuration for a particular BTS hardware implementation.
586 */
587struct bts_configuration {
588 /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */
589 unsigned char sizeof_bts;
590 /* the size of a field in the BTS record in bytes */
591 unsigned char sizeof_field;
592 /* a bitmask to enable/disable BTS in DEBUGCTL MSR */
593 unsigned long debugctl_mask;
594};
595static struct bts_configuration bts_cfg;
596
597#define BTS_MAX_RECORD_SIZE (8 * 3)
598
599
600/*
601 * Branch Trace Store (BTS) uses the following format. Different
602 * architectures vary in the size of those fields.
603 * - source linear address
604 * - destination linear address
605 * - flags
606 *
607 * Later architectures use 64bit pointers throughout, whereas earlier
608 * architectures use 32bit pointers in 32bit mode.
609 *
610 * We compute the base address for the first 8 fields based on:
611 * - the field size stored in the DS configuration
612 * - the relative field position
613 *
614 * In order to store additional information in the BTS buffer, we use
615 * a special source address to indicate that the record requires
616 * special interpretation.
617 *
618 * Netburst indicated via a bit in the flags field whether the branch
619 * was predicted; this is ignored.
620 */
621
622enum bts_field {
623 bts_from = 0,
624 bts_to,
625 bts_flags,
626
627 bts_escape = (unsigned long)-1,
628 bts_qual = bts_to,
629 bts_jiffies = bts_flags
630};
631
632static inline unsigned long bts_get(const char *base, enum bts_field field)
633{
634 base += (bts_cfg.sizeof_field * field);
635 return *(unsigned long *)base;
636}
637
638static inline void bts_set(char *base, enum bts_field field, unsigned long val)
639{
640 base += (bts_cfg.sizeof_field * field);;
641 (*(unsigned long *)base) = val;
642}
643
644/*
645 * Translate a BTS record from the raw format into the bts_struct format
646 *
647 * out (out): bts_struct interpretation
648 * raw: raw BTS record
649 */
650static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw)
651{
652 memset(out, 0, sizeof(*out));
653 if (bts_get(raw, bts_from) == bts_escape) {
654 out->qualifier = bts_get(raw, bts_qual);
655 out->variant.jiffies = bts_get(raw, bts_jiffies);
656 } else {
657 out->qualifier = BTS_BRANCH;
658 out->variant.lbr.from_ip = bts_get(raw, bts_from);
659 out->variant.lbr.to_ip = bts_get(raw, bts_to);
660 }
661}
662
663static int ptrace_bts_read_record(struct task_struct *child, size_t index, 584static int ptrace_bts_read_record(struct task_struct *child, size_t index,
664 struct bts_struct __user *out) 585 struct bts_struct __user *out)
665{ 586{
666 struct bts_struct ret; 587 const struct bts_trace *trace;
667 const void *bts_record; 588 struct bts_struct bts;
668 size_t bts_index, bts_end; 589 const unsigned char *at;
669 int error; 590 int error;
670 591
671 error = ds_get_bts_end(child->bts, &bts_end); 592 trace = ds_read_bts(child->bts);
672 if (error < 0) 593 if (!trace)
673 return error; 594 return -EPERM;
674
675 if (bts_end <= index)
676 return -EINVAL;
677 595
678 error = ds_get_bts_index(child->bts, &bts_index); 596 at = trace->ds.top - ((index + 1) * trace->ds.size);
679 if (error < 0) 597 if ((void *)at < trace->ds.begin)
680 return error; 598 at += (trace->ds.n * trace->ds.size);
681 599
682 /* translate the ptrace bts index into the ds bts index */ 600 if (!trace->read)
683 bts_index += bts_end - (index + 1); 601 return -EOPNOTSUPP;
684 if (bts_end <= bts_index)
685 bts_index -= bts_end;
686 602
687 error = ds_access_bts(child->bts, bts_index, &bts_record); 603 error = trace->read(child->bts, at, &bts);
688 if (error < 0) 604 if (error < 0)
689 return error; 605 return error;
690 606
691 ptrace_bts_translate_record(&ret, bts_record); 607 if (copy_to_user(out, &bts, sizeof(bts)))
692
693 if (copy_to_user(out, &ret, sizeof(ret)))
694 return -EFAULT; 608 return -EFAULT;
695 609
696 return sizeof(ret); 610 return sizeof(bts);
697} 611}
698 612
699static int ptrace_bts_drain(struct task_struct *child, 613static int ptrace_bts_drain(struct task_struct *child,
700 long size, 614 long size,
701 struct bts_struct __user *out) 615 struct bts_struct __user *out)
702{ 616{
703 struct bts_struct ret; 617 const struct bts_trace *trace;
704 const unsigned char *raw; 618 const unsigned char *at;
705 size_t end, i; 619 int error, drained = 0;
706 int error;
707 620
708 error = ds_get_bts_index(child->bts, &end); 621 trace = ds_read_bts(child->bts);
709 if (error < 0) 622 if (!trace)
710 return error; 623 return -EPERM;
711 624
712 if (size < (end * sizeof(struct bts_struct))) 625 if (!trace->read)
626 return -EOPNOTSUPP;
627
628 if (size < (trace->ds.top - trace->ds.begin))
713 return -EIO; 629 return -EIO;
714 630
715 error = ds_access_bts(child->bts, 0, (const void **)&raw); 631 for (at = trace->ds.begin; (void *)at < trace->ds.top;
716 if (error < 0) 632 out++, drained++, at += trace->ds.size) {
717 return error; 633 struct bts_struct bts;
634 int error;
718 635
719 for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { 636 error = trace->read(child->bts, at, &bts);
720 ptrace_bts_translate_record(&ret, raw); 637 if (error < 0)
638 return error;
721 639
722 if (copy_to_user(out, &ret, sizeof(ret))) 640 if (copy_to_user(out, &bts, sizeof(bts)))
723 return -EFAULT; 641 return -EFAULT;
724 } 642 }
725 643
726 error = ds_clear_bts(child->bts); 644 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
645
646 error = ds_reset_bts(child->bts);
727 if (error < 0) 647 if (error < 0)
728 return error; 648 return error;
729 649
730 return end; 650 return drained;
731} 651}
732 652
733static int ptrace_bts_config(struct task_struct *child, 653static int ptrace_bts_config(struct task_struct *child,
@@ -735,136 +655,89 @@ static int ptrace_bts_config(struct task_struct *child,
735 const struct ptrace_bts_config __user *ucfg) 655 const struct ptrace_bts_config __user *ucfg)
736{ 656{
737 struct ptrace_bts_config cfg; 657 struct ptrace_bts_config cfg;
738 int error = 0; 658 unsigned int flags = 0;
739
740 error = -EOPNOTSUPP;
741 if (!bts_cfg.sizeof_bts)
742 goto errout;
743 659
744 error = -EIO;
745 if (cfg_size < sizeof(cfg)) 660 if (cfg_size < sizeof(cfg))
746 goto errout; 661 return -EIO;
747 662
748 error = -EFAULT;
749 if (copy_from_user(&cfg, ucfg, sizeof(cfg))) 663 if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
750 goto errout; 664 return -EFAULT;
751
752 error = -EINVAL;
753 if ((cfg.flags & PTRACE_BTS_O_SIGNAL) &&
754 !(cfg.flags & PTRACE_BTS_O_ALLOC))
755 goto errout;
756
757 if (cfg.flags & PTRACE_BTS_O_ALLOC) {
758 bts_ovfl_callback_t ovfl = NULL;
759 unsigned int sig = 0;
760
761 error = -EINVAL;
762 if (cfg.size < (10 * bts_cfg.sizeof_bts))
763 goto errout;
764 665
765 if (cfg.flags & PTRACE_BTS_O_SIGNAL) { 666 if (child->bts) {
766 if (!cfg.signal) 667 ds_release_bts(child->bts);
767 goto errout; 668 child->bts = NULL;
669 }
768 670
769 error = -EOPNOTSUPP; 671 if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
770 goto errout; 672 if (!cfg.signal)
673 return -EINVAL;
771 674
772 sig = cfg.signal; 675 return -EOPNOTSUPP;
773 }
774 676
775 if (child->bts) { 677 child->thread.bts_ovfl_signal = cfg.signal;
776 (void)ds_release_bts(child->bts); 678 }
777 kfree(child->bts_buffer);
778 679
779 child->bts = NULL; 680 if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
780 child->bts_buffer = NULL; 681 (cfg.size != child->bts_size)) {
781 } 682 kfree(child->bts_buffer);
782 683
783 error = -ENOMEM; 684 child->bts_size = cfg.size;
784 child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL); 685 child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL);
785 if (!child->bts_buffer) 686 if (!child->bts_buffer) {
786 goto errout; 687 child->bts_size = 0;
787 688 return -ENOMEM;
788 child->bts = ds_request_bts(child, child->bts_buffer, cfg.size,
789 ovfl, /* th = */ (size_t)-1);
790 if (IS_ERR(child->bts)) {
791 error = PTR_ERR(child->bts);
792 kfree(child->bts_buffer);
793 child->bts = NULL;
794 child->bts_buffer = NULL;
795 goto errout;
796 } 689 }
797
798 child->thread.bts_ovfl_signal = sig;
799 } 690 }
800 691
801 error = -EINVAL;
802 if (!child->thread.ds_ctx && cfg.flags)
803 goto errout;
804
805 if (cfg.flags & PTRACE_BTS_O_TRACE) 692 if (cfg.flags & PTRACE_BTS_O_TRACE)
806 child->thread.debugctlmsr |= bts_cfg.debugctl_mask; 693 flags |= BTS_USER;
807 else
808 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
809 694
810 if (cfg.flags & PTRACE_BTS_O_SCHED) 695 if (cfg.flags & PTRACE_BTS_O_SCHED)
811 set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); 696 flags |= BTS_TIMESTAMPS;
812 else
813 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
814 697
815 error = sizeof(cfg); 698 child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size,
699 /* ovfl = */ NULL, /* th = */ (size_t)-1,
700 flags);
701 if (IS_ERR(child->bts)) {
702 int error = PTR_ERR(child->bts);
816 703
817out: 704 kfree(child->bts_buffer);
818 if (child->thread.debugctlmsr) 705 child->bts = NULL;
819 set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); 706 child->bts_buffer = NULL;
820 else 707 child->bts_size = 0;
821 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
822 708
823 return error; 709 return error;
710 }
824 711
825errout: 712 return sizeof(cfg);
826 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
827 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
828 goto out;
829} 713}
830 714
831static int ptrace_bts_status(struct task_struct *child, 715static int ptrace_bts_status(struct task_struct *child,
832 long cfg_size, 716 long cfg_size,
833 struct ptrace_bts_config __user *ucfg) 717 struct ptrace_bts_config __user *ucfg)
834{ 718{
719 const struct bts_trace *trace;
835 struct ptrace_bts_config cfg; 720 struct ptrace_bts_config cfg;
836 size_t end;
837 const void *base, *max;
838 int error;
839 721
840 if (cfg_size < sizeof(cfg)) 722 if (cfg_size < sizeof(cfg))
841 return -EIO; 723 return -EIO;
842 724
843 error = ds_get_bts_end(child->bts, &end); 725 trace = ds_read_bts(child->bts);
844 if (error < 0) 726 if (!trace)
845 return error; 727 return -EPERM;
846
847 error = ds_access_bts(child->bts, /* index = */ 0, &base);
848 if (error < 0)
849 return error;
850
851 error = ds_access_bts(child->bts, /* index = */ end, &max);
852 if (error < 0)
853 return error;
854 728
855 memset(&cfg, 0, sizeof(cfg)); 729 memset(&cfg, 0, sizeof(cfg));
856 cfg.size = (max - base); 730 cfg.size = trace->ds.end - trace->ds.begin;
857 cfg.signal = child->thread.bts_ovfl_signal; 731 cfg.signal = child->thread.bts_ovfl_signal;
858 cfg.bts_size = sizeof(struct bts_struct); 732 cfg.bts_size = sizeof(struct bts_struct);
859 733
860 if (cfg.signal) 734 if (cfg.signal)
861 cfg.flags |= PTRACE_BTS_O_SIGNAL; 735 cfg.flags |= PTRACE_BTS_O_SIGNAL;
862 736
863 if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && 737 if (trace->ds.flags & BTS_USER)
864 child->thread.debugctlmsr & bts_cfg.debugctl_mask)
865 cfg.flags |= PTRACE_BTS_O_TRACE; 738 cfg.flags |= PTRACE_BTS_O_TRACE;
866 739
867 if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) 740 if (trace->ds.flags & BTS_TIMESTAMPS)
868 cfg.flags |= PTRACE_BTS_O_SCHED; 741 cfg.flags |= PTRACE_BTS_O_SCHED;
869 742
870 if (copy_to_user(ucfg, &cfg, sizeof(cfg))) 743 if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
@@ -873,105 +746,28 @@ static int ptrace_bts_status(struct task_struct *child,
873 return sizeof(cfg); 746 return sizeof(cfg);
874} 747}
875 748
876static int ptrace_bts_write_record(struct task_struct *child, 749static int ptrace_bts_clear(struct task_struct *child)
877 const struct bts_struct *in)
878{ 750{
879 unsigned char bts_record[BTS_MAX_RECORD_SIZE]; 751 const struct bts_trace *trace;
880 752
881 if (BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts) 753 trace = ds_read_bts(child->bts);
882 return -EOVERFLOW; 754 if (!trace)
755 return -EPERM;
883 756
884 memset(bts_record, 0, bts_cfg.sizeof_bts); 757 memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
885 switch (in->qualifier) {
886 case BTS_INVALID:
887 break;
888 758
889 case BTS_BRANCH: 759 return ds_reset_bts(child->bts);
890 bts_set(bts_record, bts_from, in->variant.lbr.from_ip);
891 bts_set(bts_record, bts_to, in->variant.lbr.to_ip);
892 break;
893
894 case BTS_TASK_ARRIVES:
895 case BTS_TASK_DEPARTS:
896 bts_set(bts_record, bts_from, bts_escape);
897 bts_set(bts_record, bts_qual, in->qualifier);
898 bts_set(bts_record, bts_jiffies, in->variant.jiffies);
899 break;
900
901 default:
902 return -EINVAL;
903 }
904
905 return ds_write_bts(child->bts, bts_record, bts_cfg.sizeof_bts);
906} 760}
907 761
908void ptrace_bts_take_timestamp(struct task_struct *tsk, 762static int ptrace_bts_size(struct task_struct *child)
909 enum bts_qualifier qualifier)
910{ 763{
911 struct bts_struct rec = { 764 const struct bts_trace *trace;
912 .qualifier = qualifier,
913 .variant.jiffies = jiffies_64
914 };
915
916 ptrace_bts_write_record(tsk, &rec);
917}
918
919static const struct bts_configuration bts_cfg_netburst = {
920 .sizeof_bts = sizeof(long) * 3,
921 .sizeof_field = sizeof(long),
922 .debugctl_mask = (1<<2)|(1<<3)|(1<<5)
923};
924 765
925static const struct bts_configuration bts_cfg_pentium_m = { 766 trace = ds_read_bts(child->bts);
926 .sizeof_bts = sizeof(long) * 3, 767 if (!trace)
927 .sizeof_field = sizeof(long), 768 return -EPERM;
928 .debugctl_mask = (1<<6)|(1<<7)
929};
930 769
931static const struct bts_configuration bts_cfg_core2 = { 770 return (trace->ds.top - trace->ds.begin) / trace->ds.size;
932 .sizeof_bts = 8 * 3,
933 .sizeof_field = 8,
934 .debugctl_mask = (1<<6)|(1<<7)|(1<<9)
935};
936
937static inline void bts_configure(const struct bts_configuration *cfg)
938{
939 bts_cfg = *cfg;
940}
941
942void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c)
943{
944 switch (c->x86) {
945 case 0x6:
946 switch (c->x86_model) {
947 case 0 ... 0xC:
948 /* sorry, don't know about them */
949 break;
950 case 0xD:
951 case 0xE: /* Pentium M */
952 bts_configure(&bts_cfg_pentium_m);
953 break;
954 default: /* Core2, Atom, ... */
955 bts_configure(&bts_cfg_core2);
956 break;
957 }
958 break;
959 case 0xF:
960 switch (c->x86_model) {
961 case 0x0:
962 case 0x1:
963 case 0x2: /* Netburst */
964 bts_configure(&bts_cfg_netburst);
965 break;
966 default:
967 /* sorry, don't know about them */
968 break;
969 }
970 break;
971 default:
972 /* sorry, don't know about them */
973 break;
974 }
975} 771}
976#endif /* CONFIG_X86_PTRACE_BTS */ 772#endif /* CONFIG_X86_PTRACE_BTS */
977 773
@@ -988,15 +784,12 @@ void ptrace_disable(struct task_struct *child)
988#endif 784#endif
989#ifdef CONFIG_X86_PTRACE_BTS 785#ifdef CONFIG_X86_PTRACE_BTS
990 if (child->bts) { 786 if (child->bts) {
991 (void)ds_release_bts(child->bts); 787 ds_release_bts(child->bts);
788 child->bts = NULL;
789
992 kfree(child->bts_buffer); 790 kfree(child->bts_buffer);
993 child->bts_buffer = NULL; 791 child->bts_buffer = NULL;
994 792 child->bts_size = 0;
995 child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask;
996 if (!child->thread.debugctlmsr)
997 clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
998
999 clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS);
1000 } 793 }
1001#endif /* CONFIG_X86_PTRACE_BTS */ 794#endif /* CONFIG_X86_PTRACE_BTS */
1002} 795}
@@ -1129,16 +922,9 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1129 (child, data, (struct ptrace_bts_config __user *)addr); 922 (child, data, (struct ptrace_bts_config __user *)addr);
1130 break; 923 break;
1131 924
1132 case PTRACE_BTS_SIZE: { 925 case PTRACE_BTS_SIZE:
1133 size_t size; 926 ret = ptrace_bts_size(child);
1134
1135 ret = ds_get_bts_index(child->bts, &size);
1136 if (ret == 0) {
1137 WARN_ON_ONCE(size != (int) size);
1138 ret = (int) size;
1139 }
1140 break; 927 break;
1141 }
1142 928
1143 case PTRACE_BTS_GET: 929 case PTRACE_BTS_GET:
1144 ret = ptrace_bts_read_record 930 ret = ptrace_bts_read_record
@@ -1146,7 +932,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
1146 break; 932 break;
1147 933
1148 case PTRACE_BTS_CLEAR: 934 case PTRACE_BTS_CLEAR:
1149 ret = ds_clear_bts(child->bts); 935 ret = ptrace_bts_clear(child);
1150 break; 936 break;
1151 937
1152 case PTRACE_BTS_DRAIN: 938 case PTRACE_BTS_DRAIN:
@@ -1409,6 +1195,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
1409 1195
1410 case PTRACE_GET_THREAD_AREA: 1196 case PTRACE_GET_THREAD_AREA:
1411 case PTRACE_SET_THREAD_AREA: 1197 case PTRACE_SET_THREAD_AREA:
1198#ifdef CONFIG_X86_PTRACE_BTS
1199 case PTRACE_BTS_CONFIG:
1200 case PTRACE_BTS_STATUS:
1201 case PTRACE_BTS_SIZE:
1202 case PTRACE_BTS_GET:
1203 case PTRACE_BTS_CLEAR:
1204 case PTRACE_BTS_DRAIN:
1205#endif /* CONFIG_X86_PTRACE_BTS */
1412 return arch_ptrace(child, request, addr, data); 1206 return arch_ptrace(child, request, addr, data);
1413 1207
1414 default: 1208 default: