diff options
Diffstat (limited to 'arch/x86/kernel/ds.c')
-rw-r--r-- | arch/x86/kernel/ds.c | 1147 |
1 files changed, 644 insertions, 503 deletions
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index a2d1176c38ee..da91701a2348 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
@@ -6,14 +6,13 @@ | |||
6 | * precise-event based sampling (PEBS). | 6 | * precise-event based sampling (PEBS). |
7 | * | 7 | * |
8 | * It manages: | 8 | * It manages: |
9 | * - per-thread and per-cpu allocation of BTS and PEBS | 9 | * - DS and BTS hardware configuration |
10 | * - buffer memory allocation (optional) | 10 | * - buffer overflow handling (to be done) |
11 | * - buffer overflow handling | ||
12 | * - buffer access | 11 | * - buffer access |
13 | * | 12 | * |
14 | * It assumes: | 13 | * It does not do: |
15 | * - get_task_struct on all parameter tasks | 14 | * - security checking (is the caller allowed to trace the task) |
16 | * - current is allowed to trace parameter tasks | 15 | * - buffer allocation (memory accounting) |
17 | * | 16 | * |
18 | * | 17 | * |
19 | * Copyright (C) 2007-2008 Intel Corporation. | 18 | * Copyright (C) 2007-2008 Intel Corporation. |
@@ -28,22 +27,69 @@ | |||
28 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
29 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
30 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
30 | #include <linux/kernel.h> | ||
31 | 31 | ||
32 | 32 | ||
33 | /* | 33 | /* |
34 | * The configuration for a particular DS hardware implementation. | 34 | * The configuration for a particular DS hardware implementation. |
35 | */ | 35 | */ |
36 | struct ds_configuration { | 36 | struct ds_configuration { |
37 | /* the size of the DS structure in bytes */ | 37 | /* the name of the configuration */ |
38 | unsigned char sizeof_ds; | 38 | const char *name; |
39 | /* the size of one pointer-typed field in the DS structure in bytes; | 39 | /* the size of one pointer-typed field in the DS structure and |
40 | this covers the first 8 fields related to buffer management. */ | 40 | in the BTS and PEBS buffers in bytes; |
41 | this covers the first 8 DS fields related to buffer management. */ | ||
41 | unsigned char sizeof_field; | 42 | unsigned char sizeof_field; |
42 | /* the size of a BTS/PEBS record in bytes */ | 43 | /* the size of a BTS/PEBS record in bytes */ |
43 | unsigned char sizeof_rec[2]; | 44 | unsigned char sizeof_rec[2]; |
45 | /* a series of bit-masks to control various features indexed | ||
46 | * by enum ds_feature */ | ||
47 | unsigned long ctl[dsf_ctl_max]; | ||
44 | }; | 48 | }; |
45 | static struct ds_configuration ds_cfg; | 49 | static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); |
46 | 50 | ||
51 | #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) | ||
52 | |||
53 | #define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ | ||
54 | #define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ | ||
55 | #define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ | ||
56 | |||
57 | #define BTS_CONTROL \ | ||
58 | (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ | ||
59 | ds_cfg.ctl[dsf_bts_overflow]) | ||
60 | |||
61 | |||
62 | /* | ||
63 | * A BTS or PEBS tracer. | ||
64 | * | ||
65 | * This holds the configuration of the tracer and serves as a handle | ||
66 | * to identify tracers. | ||
67 | */ | ||
68 | struct ds_tracer { | ||
69 | /* the DS context (partially) owned by this tracer */ | ||
70 | struct ds_context *context; | ||
71 | /* the buffer provided on ds_request() and its size in bytes */ | ||
72 | void *buffer; | ||
73 | size_t size; | ||
74 | }; | ||
75 | |||
76 | struct bts_tracer { | ||
77 | /* the common DS part */ | ||
78 | struct ds_tracer ds; | ||
79 | /* the trace including the DS configuration */ | ||
80 | struct bts_trace trace; | ||
81 | /* buffer overflow notification function */ | ||
82 | bts_ovfl_callback_t ovfl; | ||
83 | }; | ||
84 | |||
85 | struct pebs_tracer { | ||
86 | /* the common DS part */ | ||
87 | struct ds_tracer ds; | ||
88 | /* the trace including the DS configuration */ | ||
89 | struct pebs_trace trace; | ||
90 | /* buffer overflow notification function */ | ||
91 | pebs_ovfl_callback_t ovfl; | ||
92 | }; | ||
47 | 93 | ||
48 | /* | 94 | /* |
49 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | 95 | * Debug Store (DS) save area configuration (see Intel64 and IA32 |
@@ -109,32 +155,9 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | |||
109 | 155 | ||
110 | 156 | ||
111 | /* | 157 | /* |
112 | * Locking is done only for allocating BTS or PEBS resources and for | 158 | * Locking is done only for allocating BTS or PEBS resources. |
113 | * guarding context and buffer memory allocation. | ||
114 | * | ||
115 | * Most functions require the current task to own the ds context part | ||
116 | * they are going to access. All the locking is done when validating | ||
117 | * access to the context. | ||
118 | */ | 159 | */ |
119 | static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); | 160 | static DEFINE_SPINLOCK(ds_lock); |
120 | |||
121 | /* | ||
122 | * Validate that the current task is allowed to access the BTS/PEBS | ||
123 | * buffer of the parameter task. | ||
124 | * | ||
125 | * Returns 0, if access is granted; -Eerrno, otherwise. | ||
126 | */ | ||
127 | static inline int ds_validate_access(struct ds_context *context, | ||
128 | enum ds_qualifier qual) | ||
129 | { | ||
130 | if (!context) | ||
131 | return -EPERM; | ||
132 | |||
133 | if (context->owner[qual] == current) | ||
134 | return 0; | ||
135 | |||
136 | return -EPERM; | ||
137 | } | ||
138 | 161 | ||
139 | 162 | ||
140 | /* | 163 | /* |
@@ -150,27 +173,32 @@ static inline int ds_validate_access(struct ds_context *context, | |||
150 | * >0 number of per-thread tracers | 173 | * >0 number of per-thread tracers |
151 | * <0 number of per-cpu tracers | 174 | * <0 number of per-cpu tracers |
152 | * | 175 | * |
153 | * The below functions to get and put tracers and to check the | ||
154 | * allocation type require the ds_lock to be held by the caller. | ||
155 | * | ||
156 | * Tracers essentially gives the number of ds contexts for a certain | 176 | * Tracers essentially gives the number of ds contexts for a certain |
157 | * type of allocation. | 177 | * type of allocation. |
158 | */ | 178 | */ |
159 | static long tracers; | 179 | static atomic_t tracers = ATOMIC_INIT(0); |
160 | 180 | ||
161 | static inline void get_tracer(struct task_struct *task) | 181 | static inline void get_tracer(struct task_struct *task) |
162 | { | 182 | { |
163 | tracers += (task ? 1 : -1); | 183 | if (task) |
184 | atomic_inc(&tracers); | ||
185 | else | ||
186 | atomic_dec(&tracers); | ||
164 | } | 187 | } |
165 | 188 | ||
166 | static inline void put_tracer(struct task_struct *task) | 189 | static inline void put_tracer(struct task_struct *task) |
167 | { | 190 | { |
168 | tracers -= (task ? 1 : -1); | 191 | if (task) |
192 | atomic_dec(&tracers); | ||
193 | else | ||
194 | atomic_inc(&tracers); | ||
169 | } | 195 | } |
170 | 196 | ||
171 | static inline int check_tracer(struct task_struct *task) | 197 | static inline int check_tracer(struct task_struct *task) |
172 | { | 198 | { |
173 | return (task ? (tracers >= 0) : (tracers <= 0)); | 199 | return task ? |
200 | (atomic_read(&tracers) >= 0) : | ||
201 | (atomic_read(&tracers) <= 0); | ||
174 | } | 202 | } |
175 | 203 | ||
176 | 204 | ||
@@ -183,99 +211,70 @@ static inline int check_tracer(struct task_struct *task) | |||
183 | * | 211 | * |
184 | * Contexts are use-counted. They are allocated on first access and | 212 | * Contexts are use-counted. They are allocated on first access and |
185 | * deallocated when the last user puts the context. | 213 | * deallocated when the last user puts the context. |
186 | * | ||
187 | * We distinguish between an allocating and a non-allocating get of a | ||
188 | * context: | ||
189 | * - the allocating get is used for requesting BTS/PEBS resources. It | ||
190 | * requires the caller to hold the global ds_lock. | ||
191 | * - the non-allocating get is used for all other cases. A | ||
192 | * non-existing context indicates an error. It acquires and releases | ||
193 | * the ds_lock itself for obtaining the context. | ||
194 | * | ||
195 | * A context and its DS configuration are allocated and deallocated | ||
196 | * together. A context always has a DS configuration of the | ||
197 | * appropriate size. | ||
198 | */ | ||
199 | static DEFINE_PER_CPU(struct ds_context *, system_context); | ||
200 | |||
201 | #define this_system_context per_cpu(system_context, smp_processor_id()) | ||
202 | |||
203 | /* | ||
204 | * Returns the pointer to the parameter task's context or to the | ||
205 | * system-wide context, if task is NULL. | ||
206 | * | ||
207 | * Increases the use count of the returned context, if not NULL. | ||
208 | */ | 214 | */ |
209 | static inline struct ds_context *ds_get_context(struct task_struct *task) | 215 | struct ds_context { |
210 | { | 216 | /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ |
211 | struct ds_context *context; | 217 | unsigned char ds[MAX_SIZEOF_DS]; |
212 | unsigned long irq; | 218 | /* the owner of the BTS and PEBS configuration, respectively */ |
219 | struct bts_tracer *bts_master; | ||
220 | struct pebs_tracer *pebs_master; | ||
221 | /* use count */ | ||
222 | unsigned long count; | ||
223 | /* a pointer to the context location inside the thread_struct | ||
224 | * or the per_cpu context array */ | ||
225 | struct ds_context **this; | ||
226 | /* a pointer to the task owning this context, or NULL, if the | ||
227 | * context is owned by a cpu */ | ||
228 | struct task_struct *task; | ||
229 | }; | ||
213 | 230 | ||
214 | spin_lock_irqsave(&ds_lock, irq); | 231 | static DEFINE_PER_CPU(struct ds_context *, system_context_array); |
215 | 232 | ||
216 | context = (task ? task->thread.ds_ctx : this_system_context); | 233 | #define system_context per_cpu(system_context_array, smp_processor_id()) |
217 | if (context) | ||
218 | context->count++; | ||
219 | 234 | ||
220 | spin_unlock_irqrestore(&ds_lock, irq); | ||
221 | |||
222 | return context; | ||
223 | } | ||
224 | 235 | ||
225 | /* | 236 | static inline struct ds_context *ds_get_context(struct task_struct *task) |
226 | * Same as ds_get_context, but allocates the context and it's DS | ||
227 | * structure, if necessary; returns NULL; if out of memory. | ||
228 | */ | ||
229 | static inline struct ds_context *ds_alloc_context(struct task_struct *task) | ||
230 | { | 237 | { |
231 | struct ds_context **p_context = | 238 | struct ds_context **p_context = |
232 | (task ? &task->thread.ds_ctx : &this_system_context); | 239 | (task ? &task->thread.ds_ctx : &system_context); |
233 | struct ds_context *context = *p_context; | 240 | struct ds_context *context = NULL; |
241 | struct ds_context *new_context = NULL; | ||
234 | unsigned long irq; | 242 | unsigned long irq; |
235 | 243 | ||
236 | if (!context) { | 244 | /* Chances are small that we already have a context. */ |
237 | context = kzalloc(sizeof(*context), GFP_KERNEL); | 245 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); |
238 | if (!context) | 246 | if (!new_context) |
239 | return NULL; | 247 | return NULL; |
240 | |||
241 | context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | ||
242 | if (!context->ds) { | ||
243 | kfree(context); | ||
244 | return NULL; | ||
245 | } | ||
246 | 248 | ||
247 | spin_lock_irqsave(&ds_lock, irq); | 249 | spin_lock_irqsave(&ds_lock, irq); |
248 | 250 | ||
249 | if (*p_context) { | 251 | context = *p_context; |
250 | kfree(context->ds); | 252 | if (!context) { |
251 | kfree(context); | 253 | context = new_context; |
252 | 254 | ||
253 | context = *p_context; | 255 | context->this = p_context; |
254 | } else { | 256 | context->task = task; |
255 | *p_context = context; | 257 | context->count = 0; |
256 | 258 | ||
257 | context->this = p_context; | 259 | if (task) |
258 | context->task = task; | 260 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); |
259 | 261 | ||
260 | if (task) | 262 | if (!task || (task == current)) |
261 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | 263 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); |
262 | 264 | ||
263 | if (!task || (task == current)) | 265 | *p_context = context; |
264 | wrmsrl(MSR_IA32_DS_AREA, | ||
265 | (unsigned long)context->ds); | ||
266 | } | ||
267 | spin_unlock_irqrestore(&ds_lock, irq); | ||
268 | } | 266 | } |
269 | 267 | ||
270 | context->count++; | 268 | context->count++; |
271 | 269 | ||
270 | spin_unlock_irqrestore(&ds_lock, irq); | ||
271 | |||
272 | if (context != new_context) | ||
273 | kfree(new_context); | ||
274 | |||
272 | return context; | 275 | return context; |
273 | } | 276 | } |
274 | 277 | ||
275 | /* | ||
276 | * Decreases the use count of the parameter context, if not NULL. | ||
277 | * Deallocates the context, if the use count reaches zero. | ||
278 | */ | ||
279 | static inline void ds_put_context(struct ds_context *context) | 278 | static inline void ds_put_context(struct ds_context *context) |
280 | { | 279 | { |
281 | unsigned long irq; | 280 | unsigned long irq; |
@@ -285,8 +284,10 @@ static inline void ds_put_context(struct ds_context *context) | |||
285 | 284 | ||
286 | spin_lock_irqsave(&ds_lock, irq); | 285 | spin_lock_irqsave(&ds_lock, irq); |
287 | 286 | ||
288 | if (--context->count) | 287 | if (--context->count) { |
289 | goto out; | 288 | spin_unlock_irqrestore(&ds_lock, irq); |
289 | return; | ||
290 | } | ||
290 | 291 | ||
291 | *(context->this) = NULL; | 292 | *(context->this) = NULL; |
292 | 293 | ||
@@ -296,135 +297,263 @@ static inline void ds_put_context(struct ds_context *context) | |||
296 | if (!context->task || (context->task == current)) | 297 | if (!context->task || (context->task == current)) |
297 | wrmsrl(MSR_IA32_DS_AREA, 0); | 298 | wrmsrl(MSR_IA32_DS_AREA, 0); |
298 | 299 | ||
299 | put_tracer(context->task); | 300 | spin_unlock_irqrestore(&ds_lock, irq); |
300 | 301 | ||
301 | /* free any leftover buffers from tracers that did not | ||
302 | * deallocate them properly. */ | ||
303 | kfree(context->buffer[ds_bts]); | ||
304 | kfree(context->buffer[ds_pebs]); | ||
305 | kfree(context->ds); | ||
306 | kfree(context); | 302 | kfree(context); |
307 | out: | ||
308 | spin_unlock_irqrestore(&ds_lock, irq); | ||
309 | } | 303 | } |
310 | 304 | ||
311 | 305 | ||
312 | /* | 306 | /* |
313 | * Handle a buffer overflow | 307 | * Call the tracer's callback on a buffer overflow. |
314 | * | 308 | * |
315 | * task: the task whose buffers are overflowing; | ||
316 | * NULL for a buffer overflow on the current cpu | ||
317 | * context: the ds context | 309 | * context: the ds context |
318 | * qual: the buffer type | 310 | * qual: the buffer type |
319 | */ | 311 | */ |
320 | static void ds_overflow(struct task_struct *task, struct ds_context *context, | 312 | static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) |
321 | enum ds_qualifier qual) | ||
322 | { | 313 | { |
323 | if (!context) | 314 | switch (qual) { |
324 | return; | 315 | case ds_bts: |
325 | 316 | if (context->bts_master && | |
326 | if (context->callback[qual]) | 317 | context->bts_master->ovfl) |
327 | (*context->callback[qual])(task); | 318 | context->bts_master->ovfl(context->bts_master); |
328 | 319 | break; | |
329 | /* todo: do some more overflow handling */ | 320 | case ds_pebs: |
321 | if (context->pebs_master && | ||
322 | context->pebs_master->ovfl) | ||
323 | context->pebs_master->ovfl(context->pebs_master); | ||
324 | break; | ||
325 | } | ||
330 | } | 326 | } |
331 | 327 | ||
332 | 328 | ||
333 | /* | 329 | /* |
334 | * Allocate a non-pageable buffer of the parameter size. | 330 | * Write raw data into the BTS or PEBS buffer. |
335 | * Checks the memory and the locked memory rlimit. | ||
336 | * | 331 | * |
337 | * Returns the buffer, if successful; | 332 | * The remainder of any partially written record is zeroed out. |
338 | * NULL, if out of memory or rlimit exceeded. | ||
339 | * | 333 | * |
340 | * size: the requested buffer size in bytes | 334 | * context: the DS context |
341 | * pages (out): if not NULL, contains the number of pages reserved | 335 | * qual: the buffer type |
336 | * record: the data to write | ||
337 | * size: the size of the data | ||
342 | */ | 338 | */ |
343 | static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) | 339 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, |
340 | const void *record, size_t size) | ||
344 | { | 341 | { |
345 | unsigned long rlim, vm, pgsz; | 342 | int bytes_written = 0; |
346 | void *buffer; | ||
347 | 343 | ||
348 | pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; | 344 | if (!record) |
345 | return -EINVAL; | ||
349 | 346 | ||
350 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | 347 | while (size) { |
351 | vm = current->mm->total_vm + pgsz; | 348 | unsigned long base, index, end, write_end, int_th; |
352 | if (rlim < vm) | 349 | unsigned long write_size, adj_write_size; |
353 | return NULL; | ||
354 | 350 | ||
355 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | 351 | /* |
356 | vm = current->mm->locked_vm + pgsz; | 352 | * write as much as possible without producing an |
357 | if (rlim < vm) | 353 | * overflow interrupt. |
358 | return NULL; | 354 | * |
355 | * interrupt_threshold must either be | ||
356 | * - bigger than absolute_maximum or | ||
357 | * - point to a record between buffer_base and absolute_maximum | ||
358 | * | ||
359 | * index points to a valid record. | ||
360 | */ | ||
361 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
362 | index = ds_get(context->ds, qual, ds_index); | ||
363 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
364 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
359 | 365 | ||
360 | buffer = kzalloc(size, GFP_KERNEL); | 366 | write_end = min(end, int_th); |
361 | if (!buffer) | ||
362 | return NULL; | ||
363 | 367 | ||
364 | current->mm->total_vm += pgsz; | 368 | /* if we are already beyond the interrupt threshold, |
365 | current->mm->locked_vm += pgsz; | 369 | * we fill the entire buffer */ |
370 | if (write_end <= index) | ||
371 | write_end = end; | ||
366 | 372 | ||
367 | if (pages) | 373 | if (write_end <= index) |
368 | *pages = pgsz; | 374 | break; |
375 | |||
376 | write_size = min((unsigned long) size, write_end - index); | ||
377 | memcpy((void *)index, record, write_size); | ||
369 | 378 | ||
370 | return buffer; | 379 | record = (const char *)record + write_size; |
380 | size -= write_size; | ||
381 | bytes_written += write_size; | ||
382 | |||
383 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | ||
384 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | ||
385 | |||
386 | /* zero out trailing bytes */ | ||
387 | memset((char *)index + write_size, 0, | ||
388 | adj_write_size - write_size); | ||
389 | index += adj_write_size; | ||
390 | |||
391 | if (index >= end) | ||
392 | index = base; | ||
393 | ds_set(context->ds, qual, ds_index, index); | ||
394 | |||
395 | if (index >= int_th) | ||
396 | ds_overflow(context, qual); | ||
397 | } | ||
398 | |||
399 | return bytes_written; | ||
371 | } | 400 | } |
372 | 401 | ||
373 | static int ds_request(struct task_struct *task, void *base, size_t size, | 402 | |
374 | ds_ovfl_callback_t ovfl, enum ds_qualifier qual) | 403 | /* |
404 | * Branch Trace Store (BTS) uses the following format. Different | ||
405 | * architectures vary in the size of those fields. | ||
406 | * - source linear address | ||
407 | * - destination linear address | ||
408 | * - flags | ||
409 | * | ||
410 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
411 | * architectures use 32bit pointers in 32bit mode. | ||
412 | * | ||
413 | * We compute the base address for the first 8 fields based on: | ||
414 | * - the field size stored in the DS configuration | ||
415 | * - the relative field position | ||
416 | * | ||
417 | * In order to store additional information in the BTS buffer, we use | ||
418 | * a special source address to indicate that the record requires | ||
419 | * special interpretation. | ||
420 | * | ||
421 | * Netburst indicated via a bit in the flags field whether the branch | ||
422 | * was predicted; this is ignored. | ||
423 | * | ||
424 | * We use two levels of abstraction: | ||
425 | * - the raw data level defined here | ||
426 | * - an arch-independent level defined in ds.h | ||
427 | */ | ||
428 | |||
429 | enum bts_field { | ||
430 | bts_from, | ||
431 | bts_to, | ||
432 | bts_flags, | ||
433 | |||
434 | bts_qual = bts_from, | ||
435 | bts_jiffies = bts_to, | ||
436 | bts_pid = bts_flags, | ||
437 | |||
438 | bts_qual_mask = (bts_qual_max - 1), | ||
439 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | ||
440 | }; | ||
441 | |||
442 | static inline unsigned long bts_get(const char *base, enum bts_field field) | ||
375 | { | 443 | { |
376 | struct ds_context *context; | 444 | base += (ds_cfg.sizeof_field * field); |
377 | unsigned long buffer, adj; | 445 | return *(unsigned long *)base; |
378 | const unsigned long alignment = (1 << 3); | 446 | } |
379 | unsigned long irq; | 447 | |
380 | int error = 0; | 448 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) |
449 | { | ||
450 | base += (ds_cfg.sizeof_field * field);; | ||
451 | (*(unsigned long *)base) = val; | ||
452 | } | ||
381 | 453 | ||
382 | if (!ds_cfg.sizeof_ds) | ||
383 | return -EOPNOTSUPP; | ||
384 | 454 | ||
385 | /* we require some space to do alignment adjustments below */ | 455 | /* |
386 | if (size < (alignment + ds_cfg.sizeof_rec[qual])) | 456 | * The raw BTS data is architecture dependent. |
457 | * | ||
458 | * For higher-level users, we give an arch-independent view. | ||
459 | * - ds.h defines struct bts_struct | ||
460 | * - bts_read translates one raw bts record into a bts_struct | ||
461 | * - bts_write translates one bts_struct into the raw format and | ||
462 | * writes it into the top of the parameter tracer's buffer. | ||
463 | * | ||
464 | * return: bytes read/written on success; -Eerrno, otherwise | ||
465 | */ | ||
466 | static int bts_read(struct bts_tracer *tracer, const void *at, | ||
467 | struct bts_struct *out) | ||
468 | { | ||
469 | if (!tracer) | ||
387 | return -EINVAL; | 470 | return -EINVAL; |
388 | 471 | ||
389 | /* buffer overflow notification is not yet implemented */ | 472 | if (at < tracer->trace.ds.begin) |
390 | if (ovfl) | 473 | return -EINVAL; |
391 | return -EOPNOTSUPP; | ||
392 | 474 | ||
475 | if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) | ||
476 | return -EINVAL; | ||
393 | 477 | ||
394 | context = ds_alloc_context(task); | 478 | memset(out, 0, sizeof(*out)); |
395 | if (!context) | 479 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { |
396 | return -ENOMEM; | 480 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); |
481 | out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); | ||
482 | out->variant.timestamp.pid = bts_get(at, bts_pid); | ||
483 | } else { | ||
484 | out->qualifier = bts_branch; | ||
485 | out->variant.lbr.from = bts_get(at, bts_from); | ||
486 | out->variant.lbr.to = bts_get(at, bts_to); | ||
487 | |||
488 | if (!out->variant.lbr.from && !out->variant.lbr.to) | ||
489 | out->qualifier = bts_invalid; | ||
490 | } | ||
397 | 491 | ||
398 | spin_lock_irqsave(&ds_lock, irq); | 492 | return ds_cfg.sizeof_rec[ds_bts]; |
493 | } | ||
399 | 494 | ||
400 | error = -EPERM; | 495 | static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) |
401 | if (!check_tracer(task)) | 496 | { |
402 | goto out_unlock; | 497 | unsigned char raw[MAX_SIZEOF_BTS]; |
403 | 498 | ||
404 | get_tracer(task); | 499 | if (!tracer) |
500 | return -EINVAL; | ||
405 | 501 | ||
406 | error = -EALREADY; | 502 | if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) |
407 | if (context->owner[qual] == current) | 503 | return -EOVERFLOW; |
408 | goto out_put_tracer; | ||
409 | error = -EPERM; | ||
410 | if (context->owner[qual] != NULL) | ||
411 | goto out_put_tracer; | ||
412 | context->owner[qual] = current; | ||
413 | 504 | ||
414 | spin_unlock_irqrestore(&ds_lock, irq); | 505 | switch (in->qualifier) { |
506 | case bts_invalid: | ||
507 | bts_set(raw, bts_from, 0); | ||
508 | bts_set(raw, bts_to, 0); | ||
509 | bts_set(raw, bts_flags, 0); | ||
510 | break; | ||
511 | case bts_branch: | ||
512 | bts_set(raw, bts_from, in->variant.lbr.from); | ||
513 | bts_set(raw, bts_to, in->variant.lbr.to); | ||
514 | bts_set(raw, bts_flags, 0); | ||
515 | break; | ||
516 | case bts_task_arrives: | ||
517 | case bts_task_departs: | ||
518 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | ||
519 | bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); | ||
520 | bts_set(raw, bts_pid, in->variant.timestamp.pid); | ||
521 | break; | ||
522 | default: | ||
523 | return -EINVAL; | ||
524 | } | ||
415 | 525 | ||
526 | return ds_write(tracer->ds.context, ds_bts, raw, | ||
527 | ds_cfg.sizeof_rec[ds_bts]); | ||
528 | } | ||
416 | 529 | ||
417 | error = -ENOMEM; | ||
418 | if (!base) { | ||
419 | base = ds_allocate_buffer(size, &context->pages[qual]); | ||
420 | if (!base) | ||
421 | goto out_release; | ||
422 | 530 | ||
423 | context->buffer[qual] = base; | 531 | static void ds_write_config(struct ds_context *context, |
424 | } | 532 | struct ds_trace *cfg, enum ds_qualifier qual) |
425 | error = 0; | 533 | { |
534 | unsigned char *ds = context->ds; | ||
535 | |||
536 | ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); | ||
537 | ds_set(ds, qual, ds_index, (unsigned long)cfg->top); | ||
538 | ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); | ||
539 | ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); | ||
540 | } | ||
541 | |||
542 | static void ds_read_config(struct ds_context *context, | ||
543 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
544 | { | ||
545 | unsigned char *ds = context->ds; | ||
426 | 546 | ||
427 | context->callback[qual] = ovfl; | 547 | cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); |
548 | cfg->top = (void *)ds_get(ds, qual, ds_index); | ||
549 | cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); | ||
550 | cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); | ||
551 | } | ||
552 | |||
553 | static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | ||
554 | void *base, size_t size, size_t ith, | ||
555 | unsigned int flags) { | ||
556 | unsigned long buffer, adj; | ||
428 | 557 | ||
429 | /* adjust the buffer address and size to meet alignment | 558 | /* adjust the buffer address and size to meet alignment |
430 | * constraints: | 559 | * constraints: |
@@ -436,410 +565,383 @@ static int ds_request(struct task_struct *task, void *base, size_t size, | |||
436 | */ | 565 | */ |
437 | buffer = (unsigned long)base; | 566 | buffer = (unsigned long)base; |
438 | 567 | ||
439 | adj = ALIGN(buffer, alignment) - buffer; | 568 | adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; |
440 | buffer += adj; | 569 | buffer += adj; |
441 | size -= adj; | 570 | size -= adj; |
442 | 571 | ||
443 | size /= ds_cfg.sizeof_rec[qual]; | 572 | trace->n = size / ds_cfg.sizeof_rec[qual]; |
444 | size *= ds_cfg.sizeof_rec[qual]; | 573 | trace->size = ds_cfg.sizeof_rec[qual]; |
445 | |||
446 | ds_set(context->ds, qual, ds_buffer_base, buffer); | ||
447 | ds_set(context->ds, qual, ds_index, buffer); | ||
448 | ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); | ||
449 | 574 | ||
450 | if (ovfl) { | 575 | size = (trace->n * trace->size); |
451 | /* todo: select a suitable interrupt threshold */ | ||
452 | } else | ||
453 | ds_set(context->ds, qual, | ||
454 | ds_interrupt_threshold, buffer + size + 1); | ||
455 | 576 | ||
456 | /* we keep the context until ds_release */ | 577 | trace->begin = (void *)buffer; |
457 | return error; | 578 | trace->top = trace->begin; |
458 | 579 | trace->end = (void *)(buffer + size); | |
459 | out_release: | 580 | /* The value for 'no threshold' is -1, which will set the |
460 | context->owner[qual] = NULL; | 581 | * threshold outside of the buffer, just like we want it. |
461 | ds_put_context(context); | 582 | */ |
462 | put_tracer(task); | 583 | trace->ith = (void *)(buffer + size - ith); |
463 | return error; | ||
464 | |||
465 | out_put_tracer: | ||
466 | spin_unlock_irqrestore(&ds_lock, irq); | ||
467 | ds_put_context(context); | ||
468 | put_tracer(task); | ||
469 | return error; | ||
470 | 584 | ||
471 | out_unlock: | 585 | trace->flags = flags; |
472 | spin_unlock_irqrestore(&ds_lock, irq); | ||
473 | ds_put_context(context); | ||
474 | return error; | ||
475 | } | 586 | } |
476 | 587 | ||
477 | int ds_request_bts(struct task_struct *task, void *base, size_t size, | ||
478 | ds_ovfl_callback_t ovfl) | ||
479 | { | ||
480 | return ds_request(task, base, size, ovfl, ds_bts); | ||
481 | } | ||
482 | 588 | ||
483 | int ds_request_pebs(struct task_struct *task, void *base, size_t size, | 589 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, |
484 | ds_ovfl_callback_t ovfl) | 590 | enum ds_qualifier qual, struct task_struct *task, |
485 | { | 591 | void *base, size_t size, size_t th, unsigned int flags) |
486 | return ds_request(task, base, size, ovfl, ds_pebs); | ||
487 | } | ||
488 | |||
489 | static int ds_release(struct task_struct *task, enum ds_qualifier qual) | ||
490 | { | 592 | { |
491 | struct ds_context *context; | 593 | struct ds_context *context; |
492 | int error; | 594 | int error; |
493 | 595 | ||
494 | context = ds_get_context(task); | 596 | error = -EINVAL; |
495 | error = ds_validate_access(context, qual); | 597 | if (!base) |
496 | if (error < 0) | ||
497 | goto out; | 598 | goto out; |
498 | 599 | ||
499 | kfree(context->buffer[qual]); | 600 | /* we require some space to do alignment adjustments below */ |
500 | context->buffer[qual] = NULL; | 601 | error = -EINVAL; |
501 | 602 | if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) | |
502 | current->mm->total_vm -= context->pages[qual]; | 603 | goto out; |
503 | current->mm->locked_vm -= context->pages[qual]; | ||
504 | context->pages[qual] = 0; | ||
505 | context->owner[qual] = NULL; | ||
506 | |||
507 | /* | ||
508 | * we put the context twice: | ||
509 | * once for the ds_get_context | ||
510 | * once for the corresponding ds_request | ||
511 | */ | ||
512 | ds_put_context(context); | ||
513 | out: | ||
514 | ds_put_context(context); | ||
515 | return error; | ||
516 | } | ||
517 | 604 | ||
518 | int ds_release_bts(struct task_struct *task) | 605 | if (th != (size_t)-1) { |
519 | { | 606 | th *= ds_cfg.sizeof_rec[qual]; |
520 | return ds_release(task, ds_bts); | ||
521 | } | ||
522 | 607 | ||
523 | int ds_release_pebs(struct task_struct *task) | 608 | error = -EINVAL; |
524 | { | 609 | if (size <= th) |
525 | return ds_release(task, ds_pebs); | 610 | goto out; |
526 | } | 611 | } |
527 | 612 | ||
528 | static int ds_get_index(struct task_struct *task, size_t *pos, | 613 | tracer->buffer = base; |
529 | enum ds_qualifier qual) | 614 | tracer->size = size; |
530 | { | ||
531 | struct ds_context *context; | ||
532 | unsigned long base, index; | ||
533 | int error; | ||
534 | 615 | ||
616 | error = -ENOMEM; | ||
535 | context = ds_get_context(task); | 617 | context = ds_get_context(task); |
536 | error = ds_validate_access(context, qual); | 618 | if (!context) |
537 | if (error < 0) | ||
538 | goto out; | 619 | goto out; |
620 | tracer->context = context; | ||
539 | 621 | ||
540 | base = ds_get(context->ds, qual, ds_buffer_base); | 622 | ds_init_ds_trace(trace, qual, base, size, th, flags); |
541 | index = ds_get(context->ds, qual, ds_index); | ||
542 | 623 | ||
543 | error = ((index - base) / ds_cfg.sizeof_rec[qual]); | 624 | error = 0; |
544 | if (pos) | ||
545 | *pos = error; | ||
546 | out: | 625 | out: |
547 | ds_put_context(context); | ||
548 | return error; | 626 | return error; |
549 | } | 627 | } |
550 | 628 | ||
551 | int ds_get_bts_index(struct task_struct *task, size_t *pos) | 629 | struct bts_tracer *ds_request_bts(struct task_struct *task, |
552 | { | 630 | void *base, size_t size, |
553 | return ds_get_index(task, pos, ds_bts); | 631 | bts_ovfl_callback_t ovfl, size_t th, |
554 | } | 632 | unsigned int flags) |
555 | |||
556 | int ds_get_pebs_index(struct task_struct *task, size_t *pos) | ||
557 | { | 633 | { |
558 | return ds_get_index(task, pos, ds_pebs); | 634 | struct bts_tracer *tracer; |
559 | } | 635 | unsigned long irq; |
560 | |||
561 | static int ds_get_end(struct task_struct *task, size_t *pos, | ||
562 | enum ds_qualifier qual) | ||
563 | { | ||
564 | struct ds_context *context; | ||
565 | unsigned long base, end; | ||
566 | int error; | 636 | int error; |
567 | 637 | ||
568 | context = ds_get_context(task); | 638 | error = -EOPNOTSUPP; |
569 | error = ds_validate_access(context, qual); | 639 | if (!ds_cfg.ctl[dsf_bts]) |
570 | if (error < 0) | ||
571 | goto out; | 640 | goto out; |
572 | 641 | ||
573 | base = ds_get(context->ds, qual, ds_buffer_base); | 642 | /* buffer overflow notification is not yet implemented */ |
574 | end = ds_get(context->ds, qual, ds_absolute_maximum); | 643 | error = -EOPNOTSUPP; |
644 | if (ovfl) | ||
645 | goto out; | ||
575 | 646 | ||
576 | error = ((end - base) / ds_cfg.sizeof_rec[qual]); | 647 | error = -ENOMEM; |
577 | if (pos) | 648 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
578 | *pos = error; | 649 | if (!tracer) |
579 | out: | 650 | goto out; |
580 | ds_put_context(context); | 651 | tracer->ovfl = ovfl; |
581 | return error; | ||
582 | } | ||
583 | 652 | ||
584 | int ds_get_bts_end(struct task_struct *task, size_t *pos) | 653 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
585 | { | 654 | ds_bts, task, base, size, th, flags); |
586 | return ds_get_end(task, pos, ds_bts); | 655 | if (error < 0) |
587 | } | 656 | goto out_tracer; |
588 | 657 | ||
589 | int ds_get_pebs_end(struct task_struct *task, size_t *pos) | ||
590 | { | ||
591 | return ds_get_end(task, pos, ds_pebs); | ||
592 | } | ||
593 | 658 | ||
594 | static int ds_access(struct task_struct *task, size_t index, | 659 | spin_lock_irqsave(&ds_lock, irq); |
595 | const void **record, enum ds_qualifier qual) | ||
596 | { | ||
597 | struct ds_context *context; | ||
598 | unsigned long base, idx; | ||
599 | int error; | ||
600 | 660 | ||
601 | if (!record) | 661 | error = -EPERM; |
602 | return -EINVAL; | 662 | if (!check_tracer(task)) |
663 | goto out_unlock; | ||
664 | get_tracer(task); | ||
603 | 665 | ||
604 | context = ds_get_context(task); | 666 | error = -EPERM; |
605 | error = ds_validate_access(context, qual); | 667 | if (tracer->ds.context->bts_master) |
606 | if (error < 0) | 668 | goto out_put_tracer; |
607 | goto out; | 669 | tracer->ds.context->bts_master = tracer; |
608 | 670 | ||
609 | base = ds_get(context->ds, qual, ds_buffer_base); | 671 | spin_unlock_irqrestore(&ds_lock, irq); |
610 | idx = base + (index * ds_cfg.sizeof_rec[qual]); | ||
611 | 672 | ||
612 | error = -EINVAL; | ||
613 | if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) | ||
614 | goto out; | ||
615 | 673 | ||
616 | *record = (const void *)idx; | 674 | tracer->trace.read = bts_read; |
617 | error = ds_cfg.sizeof_rec[qual]; | 675 | tracer->trace.write = bts_write; |
618 | out: | ||
619 | ds_put_context(context); | ||
620 | return error; | ||
621 | } | ||
622 | 676 | ||
623 | int ds_access_bts(struct task_struct *task, size_t index, const void **record) | 677 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); |
624 | { | 678 | ds_resume_bts(tracer); |
625 | return ds_access(task, index, record, ds_bts); | ||
626 | } | ||
627 | 679 | ||
628 | int ds_access_pebs(struct task_struct *task, size_t index, const void **record) | 680 | return tracer; |
629 | { | 681 | |
630 | return ds_access(task, index, record, ds_pebs); | 682 | out_put_tracer: |
683 | put_tracer(task); | ||
684 | out_unlock: | ||
685 | spin_unlock_irqrestore(&ds_lock, irq); | ||
686 | ds_put_context(tracer->ds.context); | ||
687 | out_tracer: | ||
688 | kfree(tracer); | ||
689 | out: | ||
690 | return ERR_PTR(error); | ||
631 | } | 691 | } |
632 | 692 | ||
633 | static int ds_write(struct task_struct *task, const void *record, size_t size, | 693 | struct pebs_tracer *ds_request_pebs(struct task_struct *task, |
634 | enum ds_qualifier qual, int force) | 694 | void *base, size_t size, |
695 | pebs_ovfl_callback_t ovfl, size_t th, | ||
696 | unsigned int flags) | ||
635 | { | 697 | { |
636 | struct ds_context *context; | 698 | struct pebs_tracer *tracer; |
699 | unsigned long irq; | ||
637 | int error; | 700 | int error; |
638 | 701 | ||
639 | if (!record) | 702 | /* buffer overflow notification is not yet implemented */ |
640 | return -EINVAL; | 703 | error = -EOPNOTSUPP; |
704 | if (ovfl) | ||
705 | goto out; | ||
641 | 706 | ||
642 | error = -EPERM; | 707 | error = -ENOMEM; |
643 | context = ds_get_context(task); | 708 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
644 | if (!context) | 709 | if (!tracer) |
645 | goto out; | 710 | goto out; |
711 | tracer->ovfl = ovfl; | ||
646 | 712 | ||
647 | if (!force) { | 713 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
648 | error = ds_validate_access(context, qual); | 714 | ds_pebs, task, base, size, th, flags); |
649 | if (error < 0) | 715 | if (error < 0) |
650 | goto out; | 716 | goto out_tracer; |
651 | } | ||
652 | 717 | ||
653 | error = 0; | 718 | spin_lock_irqsave(&ds_lock, irq); |
654 | while (size) { | ||
655 | unsigned long base, index, end, write_end, int_th; | ||
656 | unsigned long write_size, adj_write_size; | ||
657 | 719 | ||
658 | /* | 720 | error = -EPERM; |
659 | * write as much as possible without producing an | 721 | if (!check_tracer(task)) |
660 | * overflow interrupt. | 722 | goto out_unlock; |
661 | * | 723 | get_tracer(task); |
662 | * interrupt_threshold must either be | ||
663 | * - bigger than absolute_maximum or | ||
664 | * - point to a record between buffer_base and absolute_maximum | ||
665 | * | ||
666 | * index points to a valid record. | ||
667 | */ | ||
668 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
669 | index = ds_get(context->ds, qual, ds_index); | ||
670 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
671 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
672 | 724 | ||
673 | write_end = min(end, int_th); | 725 | error = -EPERM; |
726 | if (tracer->ds.context->pebs_master) | ||
727 | goto out_put_tracer; | ||
728 | tracer->ds.context->pebs_master = tracer; | ||
674 | 729 | ||
675 | /* if we are already beyond the interrupt threshold, | 730 | spin_unlock_irqrestore(&ds_lock, irq); |
676 | * we fill the entire buffer */ | ||
677 | if (write_end <= index) | ||
678 | write_end = end; | ||
679 | 731 | ||
680 | if (write_end <= index) | 732 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); |
681 | goto out; | 733 | ds_resume_pebs(tracer); |
682 | 734 | ||
683 | write_size = min((unsigned long) size, write_end - index); | 735 | return tracer; |
684 | memcpy((void *)index, record, write_size); | ||
685 | 736 | ||
686 | record = (const char *)record + write_size; | 737 | out_put_tracer: |
687 | size -= write_size; | 738 | put_tracer(task); |
688 | error += write_size; | 739 | out_unlock: |
740 | spin_unlock_irqrestore(&ds_lock, irq); | ||
741 | ds_put_context(tracer->ds.context); | ||
742 | out_tracer: | ||
743 | kfree(tracer); | ||
744 | out: | ||
745 | return ERR_PTR(error); | ||
746 | } | ||
689 | 747 | ||
690 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | 748 | void ds_release_bts(struct bts_tracer *tracer) |
691 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | 749 | { |
750 | if (!tracer) | ||
751 | return; | ||
692 | 752 | ||
693 | /* zero out trailing bytes */ | 753 | ds_suspend_bts(tracer); |
694 | memset((char *)index + write_size, 0, | ||
695 | adj_write_size - write_size); | ||
696 | index += adj_write_size; | ||
697 | 754 | ||
698 | if (index >= end) | 755 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); |
699 | index = base; | 756 | tracer->ds.context->bts_master = NULL; |
700 | ds_set(context->ds, qual, ds_index, index); | ||
701 | 757 | ||
702 | if (index >= int_th) | 758 | put_tracer(tracer->ds.context->task); |
703 | ds_overflow(task, context, qual); | 759 | ds_put_context(tracer->ds.context); |
704 | } | ||
705 | 760 | ||
706 | out: | 761 | kfree(tracer); |
707 | ds_put_context(context); | ||
708 | return error; | ||
709 | } | 762 | } |
710 | 763 | ||
711 | int ds_write_bts(struct task_struct *task, const void *record, size_t size) | 764 | void ds_suspend_bts(struct bts_tracer *tracer) |
712 | { | 765 | { |
713 | return ds_write(task, record, size, ds_bts, /* force = */ 0); | 766 | struct task_struct *task; |
714 | } | ||
715 | 767 | ||
716 | int ds_write_pebs(struct task_struct *task, const void *record, size_t size) | 768 | if (!tracer) |
717 | { | 769 | return; |
718 | return ds_write(task, record, size, ds_pebs, /* force = */ 0); | ||
719 | } | ||
720 | 770 | ||
721 | int ds_unchecked_write_bts(struct task_struct *task, | 771 | task = tracer->ds.context->task; |
722 | const void *record, size_t size) | ||
723 | { | ||
724 | return ds_write(task, record, size, ds_bts, /* force = */ 1); | ||
725 | } | ||
726 | 772 | ||
727 | int ds_unchecked_write_pebs(struct task_struct *task, | 773 | if (!task || (task == current)) |
728 | const void *record, size_t size) | 774 | update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); |
729 | { | 775 | |
730 | return ds_write(task, record, size, ds_pebs, /* force = */ 1); | 776 | if (task) { |
777 | task->thread.debugctlmsr &= ~BTS_CONTROL; | ||
778 | |||
779 | if (!task->thread.debugctlmsr) | ||
780 | clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | ||
781 | } | ||
731 | } | 782 | } |
732 | 783 | ||
733 | static int ds_reset_or_clear(struct task_struct *task, | 784 | void ds_resume_bts(struct bts_tracer *tracer) |
734 | enum ds_qualifier qual, int clear) | ||
735 | { | 785 | { |
736 | struct ds_context *context; | 786 | struct task_struct *task; |
737 | unsigned long base, end; | 787 | unsigned long control; |
738 | int error; | ||
739 | 788 | ||
740 | context = ds_get_context(task); | 789 | if (!tracer) |
741 | error = ds_validate_access(context, qual); | 790 | return; |
742 | if (error < 0) | ||
743 | goto out; | ||
744 | 791 | ||
745 | base = ds_get(context->ds, qual, ds_buffer_base); | 792 | task = tracer->ds.context->task; |
746 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
747 | 793 | ||
748 | if (clear) | 794 | control = ds_cfg.ctl[dsf_bts]; |
749 | memset((void *)base, 0, end - base); | 795 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) |
796 | control |= ds_cfg.ctl[dsf_bts_kernel]; | ||
797 | if (!(tracer->trace.ds.flags & BTS_USER)) | ||
798 | control |= ds_cfg.ctl[dsf_bts_user]; | ||
750 | 799 | ||
751 | ds_set(context->ds, qual, ds_index, base); | 800 | if (task) { |
801 | task->thread.debugctlmsr |= control; | ||
802 | set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | ||
803 | } | ||
752 | 804 | ||
753 | error = 0; | 805 | if (!task || (task == current)) |
754 | out: | 806 | update_debugctlmsr(get_debugctlmsr() | control); |
755 | ds_put_context(context); | ||
756 | return error; | ||
757 | } | 807 | } |
758 | 808 | ||
759 | int ds_reset_bts(struct task_struct *task) | 809 | void ds_release_pebs(struct pebs_tracer *tracer) |
760 | { | 810 | { |
761 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); | 811 | if (!tracer) |
812 | return; | ||
813 | |||
814 | ds_suspend_pebs(tracer); | ||
815 | |||
816 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); | ||
817 | tracer->ds.context->pebs_master = NULL; | ||
818 | |||
819 | put_tracer(tracer->ds.context->task); | ||
820 | ds_put_context(tracer->ds.context); | ||
821 | |||
822 | kfree(tracer); | ||
762 | } | 823 | } |
763 | 824 | ||
764 | int ds_reset_pebs(struct task_struct *task) | 825 | void ds_suspend_pebs(struct pebs_tracer *tracer) |
765 | { | 826 | { |
766 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); | 827 | |
767 | } | 828 | } |
768 | 829 | ||
769 | int ds_clear_bts(struct task_struct *task) | 830 | void ds_resume_pebs(struct pebs_tracer *tracer) |
770 | { | 831 | { |
771 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); | 832 | |
772 | } | 833 | } |
773 | 834 | ||
774 | int ds_clear_pebs(struct task_struct *task) | 835 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) |
775 | { | 836 | { |
776 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); | 837 | if (!tracer) |
838 | return NULL; | ||
839 | |||
840 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
841 | return &tracer->trace; | ||
777 | } | 842 | } |
778 | 843 | ||
779 | int ds_get_pebs_reset(struct task_struct *task, u64 *value) | 844 | const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) |
780 | { | 845 | { |
781 | struct ds_context *context; | 846 | if (!tracer) |
782 | int error; | 847 | return NULL; |
848 | |||
849 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | ||
850 | tracer->trace.reset_value = | ||
851 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); | ||
783 | 852 | ||
784 | if (!value) | 853 | return &tracer->trace; |
854 | } | ||
855 | |||
856 | int ds_reset_bts(struct bts_tracer *tracer) | ||
857 | { | ||
858 | if (!tracer) | ||
785 | return -EINVAL; | 859 | return -EINVAL; |
786 | 860 | ||
787 | context = ds_get_context(task); | 861 | tracer->trace.ds.top = tracer->trace.ds.begin; |
788 | error = ds_validate_access(context, ds_pebs); | ||
789 | if (error < 0) | ||
790 | goto out; | ||
791 | 862 | ||
792 | *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); | 863 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
864 | (unsigned long)tracer->trace.ds.top); | ||
793 | 865 | ||
794 | error = 0; | 866 | return 0; |
795 | out: | ||
796 | ds_put_context(context); | ||
797 | return error; | ||
798 | } | 867 | } |
799 | 868 | ||
800 | int ds_set_pebs_reset(struct task_struct *task, u64 value) | 869 | int ds_reset_pebs(struct pebs_tracer *tracer) |
801 | { | 870 | { |
802 | struct ds_context *context; | 871 | if (!tracer) |
803 | int error; | 872 | return -EINVAL; |
804 | 873 | ||
805 | context = ds_get_context(task); | 874 | tracer->trace.ds.top = tracer->trace.ds.begin; |
806 | error = ds_validate_access(context, ds_pebs); | ||
807 | if (error < 0) | ||
808 | goto out; | ||
809 | 875 | ||
810 | *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; | 876 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
877 | (unsigned long)tracer->trace.ds.top); | ||
811 | 878 | ||
812 | error = 0; | 879 | return 0; |
813 | out: | 880 | } |
814 | ds_put_context(context); | 881 | |
815 | return error; | 882 | int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) |
883 | { | ||
884 | if (!tracer) | ||
885 | return -EINVAL; | ||
886 | |||
887 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; | ||
888 | |||
889 | return 0; | ||
816 | } | 890 | } |
817 | 891 | ||
818 | static const struct ds_configuration ds_cfg_var = { | 892 | static const struct ds_configuration ds_cfg_netburst = { |
819 | .sizeof_ds = sizeof(long) * 12, | 893 | .name = "netburst", |
820 | .sizeof_field = sizeof(long), | 894 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), |
821 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | 895 | .ctl[dsf_bts_kernel] = (1 << 5), |
896 | .ctl[dsf_bts_user] = (1 << 6), | ||
897 | |||
898 | .sizeof_field = sizeof(long), | ||
899 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
822 | #ifdef __i386__ | 900 | #ifdef __i386__ |
823 | .sizeof_rec[ds_pebs] = sizeof(long) * 10 | 901 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, |
824 | #else | 902 | #else |
825 | .sizeof_rec[ds_pebs] = sizeof(long) * 18 | 903 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, |
826 | #endif | 904 | #endif |
827 | }; | 905 | }; |
828 | static const struct ds_configuration ds_cfg_64 = { | 906 | static const struct ds_configuration ds_cfg_pentium_m = { |
829 | .sizeof_ds = 8 * 12, | 907 | .name = "pentium m", |
830 | .sizeof_field = 8, | 908 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
831 | .sizeof_rec[ds_bts] = 8 * 3, | 909 | |
910 | .sizeof_field = sizeof(long), | ||
911 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
832 | #ifdef __i386__ | 912 | #ifdef __i386__ |
833 | .sizeof_rec[ds_pebs] = 8 * 10 | 913 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, |
834 | #else | 914 | #else |
835 | .sizeof_rec[ds_pebs] = 8 * 18 | 915 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, |
836 | #endif | 916 | #endif |
837 | }; | 917 | }; |
918 | static const struct ds_configuration ds_cfg_core2 = { | ||
919 | .name = "core 2", | ||
920 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
921 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
922 | .ctl[dsf_bts_user] = (1 << 10), | ||
923 | |||
924 | .sizeof_field = 8, | ||
925 | .sizeof_rec[ds_bts] = 8 * 3, | ||
926 | .sizeof_rec[ds_pebs] = 8 * 18, | ||
927 | }; | ||
838 | 928 | ||
839 | static inline void | 929 | static void |
840 | ds_configure(const struct ds_configuration *cfg) | 930 | ds_configure(const struct ds_configuration *cfg) |
841 | { | 931 | { |
932 | memset(&ds_cfg, 0, sizeof(ds_cfg)); | ||
842 | ds_cfg = *cfg; | 933 | ds_cfg = *cfg; |
934 | |||
935 | printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); | ||
936 | |||
937 | if (!cpu_has_bts) { | ||
938 | ds_cfg.ctl[dsf_bts] = 0; | ||
939 | printk(KERN_INFO "[ds] bts not available\n"); | ||
940 | } | ||
941 | if (!cpu_has_pebs) | ||
942 | printk(KERN_INFO "[ds] pebs not available\n"); | ||
943 | |||
944 | WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); | ||
843 | } | 945 | } |
844 | 946 | ||
845 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | 947 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) |
@@ -847,16 +949,15 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
847 | switch (c->x86) { | 949 | switch (c->x86) { |
848 | case 0x6: | 950 | case 0x6: |
849 | switch (c->x86_model) { | 951 | switch (c->x86_model) { |
952 | case 0 ... 0xC: | ||
953 | /* sorry, don't know about them */ | ||
954 | break; | ||
850 | case 0xD: | 955 | case 0xD: |
851 | case 0xE: /* Pentium M */ | 956 | case 0xE: /* Pentium M */ |
852 | ds_configure(&ds_cfg_var); | 957 | ds_configure(&ds_cfg_pentium_m); |
853 | break; | 958 | break; |
854 | case 0xF: /* Core2 */ | 959 | default: /* Core2, Atom, ... */ |
855 | case 0x1C: /* Atom */ | 960 | ds_configure(&ds_cfg_core2); |
856 | ds_configure(&ds_cfg_64); | ||
857 | break; | ||
858 | default: | ||
859 | /* sorry, don't know about them */ | ||
860 | break; | 961 | break; |
861 | } | 962 | } |
862 | break; | 963 | break; |
@@ -865,7 +966,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
865 | case 0x0: | 966 | case 0x0: |
866 | case 0x1: | 967 | case 0x1: |
867 | case 0x2: /* Netburst */ | 968 | case 0x2: /* Netburst */ |
868 | ds_configure(&ds_cfg_var); | 969 | ds_configure(&ds_cfg_netburst); |
869 | break; | 970 | break; |
870 | default: | 971 | default: |
871 | /* sorry, don't know about them */ | 972 | /* sorry, don't know about them */ |
@@ -878,12 +979,52 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
878 | } | 979 | } |
879 | } | 980 | } |
880 | 981 | ||
881 | void ds_free(struct ds_context *context) | 982 | /* |
983 | * Change the DS configuration from tracing prev to tracing next. | ||
984 | */ | ||
985 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | ||
986 | { | ||
987 | struct ds_context *prev_ctx = prev->thread.ds_ctx; | ||
988 | struct ds_context *next_ctx = next->thread.ds_ctx; | ||
989 | |||
990 | if (prev_ctx) { | ||
991 | update_debugctlmsr(0); | ||
992 | |||
993 | if (prev_ctx->bts_master && | ||
994 | (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
995 | struct bts_struct ts = { | ||
996 | .qualifier = bts_task_departs, | ||
997 | .variant.timestamp.jiffies = jiffies_64, | ||
998 | .variant.timestamp.pid = prev->pid | ||
999 | }; | ||
1000 | bts_write(prev_ctx->bts_master, &ts); | ||
1001 | } | ||
1002 | } | ||
1003 | |||
1004 | if (next_ctx) { | ||
1005 | if (next_ctx->bts_master && | ||
1006 | (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
1007 | struct bts_struct ts = { | ||
1008 | .qualifier = bts_task_arrives, | ||
1009 | .variant.timestamp.jiffies = jiffies_64, | ||
1010 | .variant.timestamp.pid = next->pid | ||
1011 | }; | ||
1012 | bts_write(next_ctx->bts_master, &ts); | ||
1013 | } | ||
1014 | |||
1015 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | ||
1016 | } | ||
1017 | |||
1018 | update_debugctlmsr(next->thread.debugctlmsr); | ||
1019 | } | ||
1020 | |||
1021 | void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) | ||
1022 | { | ||
1023 | clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); | ||
1024 | tsk->thread.ds_ctx = NULL; | ||
1025 | } | ||
1026 | |||
1027 | void ds_exit_thread(struct task_struct *tsk) | ||
882 | { | 1028 | { |
883 | /* This is called when the task owning the parameter context | 1029 | WARN_ON(tsk->thread.ds_ctx); |
884 | * is dying. There should not be any user of that context left | ||
885 | * to disturb us, anymore. */ | ||
886 | unsigned long leftovers = context->count; | ||
887 | while (leftovers--) | ||
888 | ds_put_context(context); | ||
889 | } | 1030 | } |