aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/ds.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/ds.c')
-rw-r--r--arch/x86/kernel/ds.c921
1 files changed, 663 insertions, 258 deletions
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 87b67e3a765a..48bfe1386038 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -19,45 +19,61 @@
19 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 19 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
20 */ 20 */
21 21
22 22#include <linux/kernel.h>
23#include <asm/ds.h>
24
25#include <linux/errno.h>
26#include <linux/string.h> 23#include <linux/string.h>
27#include <linux/slab.h> 24#include <linux/errno.h>
28#include <linux/sched.h> 25#include <linux/sched.h>
26#include <linux/slab.h>
29#include <linux/mm.h> 27#include <linux/mm.h>
30#include <linux/kernel.h> 28#include <linux/trace_clock.h>
29
30#include <asm/ds.h>
31 31
32#include "ds_selftest.h"
32 33
33/* 34/*
34 * The configuration for a particular DS hardware implementation. 35 * The configuration for a particular DS hardware implementation:
35 */ 36 */
36struct ds_configuration { 37struct ds_configuration {
37 /* the name of the configuration */ 38 /* The name of the configuration: */
38 const char *name; 39 const char *name;
39 /* the size of one pointer-typed field in the DS structure and 40
40 in the BTS and PEBS buffers in bytes; 41 /* The size of pointer-typed fields in DS, BTS, and PEBS: */
41 this covers the first 8 DS fields related to buffer management. */ 42 unsigned char sizeof_ptr_field;
42 unsigned char sizeof_field; 43
43 /* the size of a BTS/PEBS record in bytes */ 44 /* The size of a BTS/PEBS record in bytes: */
44 unsigned char sizeof_rec[2]; 45 unsigned char sizeof_rec[2];
45 /* a series of bit-masks to control various features indexed 46
46 * by enum ds_feature */ 47 /* The number of pebs counter reset values in the DS structure. */
47 unsigned long ctl[dsf_ctl_max]; 48 unsigned char nr_counter_reset;
49
50 /* Control bit-masks indexed by enum ds_feature: */
51 unsigned long ctl[dsf_ctl_max];
48}; 52};
49static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); 53static struct ds_configuration ds_cfg __read_mostly;
54
55
56/* Maximal size of a DS configuration: */
57#define MAX_SIZEOF_DS 0x80
50 58
51#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) 59/* Maximal size of a BTS record: */
60#define MAX_SIZEOF_BTS (3 * 8)
52 61
53#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ 62/* BTS and PEBS buffer alignment: */
54#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ 63#define DS_ALIGNMENT (1 << 3)
55#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
56 64
57#define BTS_CONTROL \ 65/* Number of buffer pointers in DS: */
58 (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ 66#define NUM_DS_PTR_FIELDS 8
59 ds_cfg.ctl[dsf_bts_overflow])
60 67
68/* Size of a pebs reset value in DS: */
69#define PEBS_RESET_FIELD_SIZE 8
70
71/* Mask of control bits in the DS MSR register: */
72#define BTS_CONTROL \
73 ( ds_cfg.ctl[dsf_bts] | \
74 ds_cfg.ctl[dsf_bts_kernel] | \
75 ds_cfg.ctl[dsf_bts_user] | \
76 ds_cfg.ctl[dsf_bts_overflow] )
61 77
62/* 78/*
63 * A BTS or PEBS tracer. 79 * A BTS or PEBS tracer.
@@ -66,29 +82,36 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
66 * to identify tracers. 82 * to identify tracers.
67 */ 83 */
68struct ds_tracer { 84struct ds_tracer {
69 /* the DS context (partially) owned by this tracer */ 85 /* The DS context (partially) owned by this tracer. */
70 struct ds_context *context; 86 struct ds_context *context;
71 /* the buffer provided on ds_request() and its size in bytes */ 87 /* The buffer provided on ds_request() and its size in bytes. */
72 void *buffer; 88 void *buffer;
73 size_t size; 89 size_t size;
74}; 90};
75 91
76struct bts_tracer { 92struct bts_tracer {
77 /* the common DS part */ 93 /* The common DS part: */
78 struct ds_tracer ds; 94 struct ds_tracer ds;
79 /* the trace including the DS configuration */ 95
80 struct bts_trace trace; 96 /* The trace including the DS configuration: */
81 /* buffer overflow notification function */ 97 struct bts_trace trace;
82 bts_ovfl_callback_t ovfl; 98
99 /* Buffer overflow notification function: */
100 bts_ovfl_callback_t ovfl;
101
102 /* Active flags affecting trace collection. */
103 unsigned int flags;
83}; 104};
84 105
85struct pebs_tracer { 106struct pebs_tracer {
86 /* the common DS part */ 107 /* The common DS part: */
87 struct ds_tracer ds; 108 struct ds_tracer ds;
88 /* the trace including the DS configuration */ 109
89 struct pebs_trace trace; 110 /* The trace including the DS configuration: */
90 /* buffer overflow notification function */ 111 struct pebs_trace trace;
91 pebs_ovfl_callback_t ovfl; 112
113 /* Buffer overflow notification function: */
114 pebs_ovfl_callback_t ovfl;
92}; 115};
93 116
94/* 117/*
@@ -97,6 +120,7 @@ struct pebs_tracer {
97 * 120 *
98 * The DS configuration consists of the following fields; different 121 * The DS configuration consists of the following fields; different
99 * architetures vary in the size of those fields. 122 * architetures vary in the size of those fields.
123 *
100 * - double-word aligned base linear address of the BTS buffer 124 * - double-word aligned base linear address of the BTS buffer
101 * - write pointer into the BTS buffer 125 * - write pointer into the BTS buffer
102 * - end linear address of the BTS buffer (one byte beyond the end of 126 * - end linear address of the BTS buffer (one byte beyond the end of
@@ -135,21 +159,22 @@ enum ds_field {
135}; 159};
136 160
137enum ds_qualifier { 161enum ds_qualifier {
138 ds_bts = 0, 162 ds_bts = 0,
139 ds_pebs 163 ds_pebs
140}; 164};
141 165
142static inline unsigned long ds_get(const unsigned char *base, 166static inline unsigned long
143 enum ds_qualifier qual, enum ds_field field) 167ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
144{ 168{
145 base += (ds_cfg.sizeof_field * (field + (4 * qual))); 169 base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
146 return *(unsigned long *)base; 170 return *(unsigned long *)base;
147} 171}
148 172
149static inline void ds_set(unsigned char *base, enum ds_qualifier qual, 173static inline void
150 enum ds_field field, unsigned long value) 174ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
175 unsigned long value)
151{ 176{
152 base += (ds_cfg.sizeof_field * (field + (4 * qual))); 177 base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
153 (*(unsigned long *)base) = value; 178 (*(unsigned long *)base) = value;
154} 179}
155 180
@@ -159,7 +184,6 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
159 */ 184 */
160static DEFINE_SPINLOCK(ds_lock); 185static DEFINE_SPINLOCK(ds_lock);
161 186
162
163/* 187/*
164 * We either support (system-wide) per-cpu or per-thread allocation. 188 * We either support (system-wide) per-cpu or per-thread allocation.
165 * We distinguish the two based on the task_struct pointer, where a 189 * We distinguish the two based on the task_struct pointer, where a
@@ -178,12 +202,28 @@ static DEFINE_SPINLOCK(ds_lock);
178 */ 202 */
179static atomic_t tracers = ATOMIC_INIT(0); 203static atomic_t tracers = ATOMIC_INIT(0);
180 204
181static inline void get_tracer(struct task_struct *task) 205static inline int get_tracer(struct task_struct *task)
182{ 206{
183 if (task) 207 int error;
208
209 spin_lock_irq(&ds_lock);
210
211 if (task) {
212 error = -EPERM;
213 if (atomic_read(&tracers) < 0)
214 goto out;
184 atomic_inc(&tracers); 215 atomic_inc(&tracers);
185 else 216 } else {
217 error = -EPERM;
218 if (atomic_read(&tracers) > 0)
219 goto out;
186 atomic_dec(&tracers); 220 atomic_dec(&tracers);
221 }
222
223 error = 0;
224out:
225 spin_unlock_irq(&ds_lock);
226 return error;
187} 227}
188 228
189static inline void put_tracer(struct task_struct *task) 229static inline void put_tracer(struct task_struct *task)
@@ -194,14 +234,6 @@ static inline void put_tracer(struct task_struct *task)
194 atomic_inc(&tracers); 234 atomic_inc(&tracers);
195} 235}
196 236
197static inline int check_tracer(struct task_struct *task)
198{
199 return task ?
200 (atomic_read(&tracers) >= 0) :
201 (atomic_read(&tracers) <= 0);
202}
203
204
205/* 237/*
206 * The DS context is either attached to a thread or to a cpu: 238 * The DS context is either attached to a thread or to a cpu:
207 * - in the former case, the thread_struct contains a pointer to the 239 * - in the former case, the thread_struct contains a pointer to the
@@ -213,61 +245,58 @@ static inline int check_tracer(struct task_struct *task)
213 * deallocated when the last user puts the context. 245 * deallocated when the last user puts the context.
214 */ 246 */
215struct ds_context { 247struct ds_context {
216 /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ 248 /* The DS configuration; goes into MSR_IA32_DS_AREA: */
217 unsigned char ds[MAX_SIZEOF_DS]; 249 unsigned char ds[MAX_SIZEOF_DS];
218 /* the owner of the BTS and PEBS configuration, respectively */ 250
219 struct bts_tracer *bts_master; 251 /* The owner of the BTS and PEBS configuration, respectively: */
220 struct pebs_tracer *pebs_master; 252 struct bts_tracer *bts_master;
221 /* use count */ 253 struct pebs_tracer *pebs_master;
222 unsigned long count;
223 /* a pointer to the context location inside the thread_struct
224 * or the per_cpu context array */
225 struct ds_context **this;
226 /* a pointer to the task owning this context, or NULL, if the
227 * context is owned by a cpu */
228 struct task_struct *task;
229};
230 254
231static DEFINE_PER_CPU(struct ds_context *, system_context_array); 255 /* Use count: */
256 unsigned long count;
232 257
233#define system_context per_cpu(system_context_array, smp_processor_id()) 258 /* Pointer to the context pointer field: */
259 struct ds_context **this;
260
261 /* The traced task; NULL for cpu tracing: */
262 struct task_struct *task;
263
264 /* The traced cpu; only valid if task is NULL: */
265 int cpu;
266};
234 267
268static DEFINE_PER_CPU(struct ds_context *, cpu_context);
235 269
236static inline struct ds_context *ds_get_context(struct task_struct *task) 270
271static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
237{ 272{
238 struct ds_context **p_context = 273 struct ds_context **p_context =
239 (task ? &task->thread.ds_ctx : &system_context); 274 (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu));
240 struct ds_context *context = NULL; 275 struct ds_context *context = NULL;
241 struct ds_context *new_context = NULL; 276 struct ds_context *new_context = NULL;
242 unsigned long irq;
243 277
244 /* Chances are small that we already have a context. */ 278 /* Chances are small that we already have a context. */
245 new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); 279 new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
246 if (!new_context) 280 if (!new_context)
247 return NULL; 281 return NULL;
248 282
249 spin_lock_irqsave(&ds_lock, irq); 283 spin_lock_irq(&ds_lock);
250 284
251 context = *p_context; 285 context = *p_context;
252 if (!context) { 286 if (likely(!context)) {
253 context = new_context; 287 context = new_context;
254 288
255 context->this = p_context; 289 context->this = p_context;
256 context->task = task; 290 context->task = task;
291 context->cpu = cpu;
257 context->count = 0; 292 context->count = 0;
258 293
259 if (task)
260 set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
261
262 if (!task || (task == current))
263 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
264
265 *p_context = context; 294 *p_context = context;
266 } 295 }
267 296
268 context->count++; 297 context->count++;
269 298
270 spin_unlock_irqrestore(&ds_lock, irq); 299 spin_unlock_irq(&ds_lock);
271 300
272 if (context != new_context) 301 if (context != new_context)
273 kfree(new_context); 302 kfree(new_context);
@@ -275,8 +304,9 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
275 return context; 304 return context;
276} 305}
277 306
278static inline void ds_put_context(struct ds_context *context) 307static void ds_put_context(struct ds_context *context)
279{ 308{
309 struct task_struct *task;
280 unsigned long irq; 310 unsigned long irq;
281 311
282 if (!context) 312 if (!context)
@@ -291,17 +321,55 @@ static inline void ds_put_context(struct ds_context *context)
291 321
292 *(context->this) = NULL; 322 *(context->this) = NULL;
293 323
294 if (context->task) 324 task = context->task;
295 clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); 325
326 if (task)
327 clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
296 328
297 if (!context->task || (context->task == current)) 329 /*
298 wrmsrl(MSR_IA32_DS_AREA, 0); 330 * We leave the (now dangling) pointer to the DS configuration in
331 * the DS_AREA msr. This is as good or as bad as replacing it with
332 * NULL - the hardware would crash if we enabled tracing.
333 *
334 * This saves us some problems with having to write an msr on a
335 * different cpu while preventing others from doing the same for the
336 * next context for that same cpu.
337 */
299 338
300 spin_unlock_irqrestore(&ds_lock, irq); 339 spin_unlock_irqrestore(&ds_lock, irq);
301 340
341 /* The context might still be in use for context switching. */
342 if (task && (task != current))
343 wait_task_context_switch(task);
344
302 kfree(context); 345 kfree(context);
303} 346}
304 347
348static void ds_install_ds_area(struct ds_context *context)
349{
350 unsigned long ds;
351
352 ds = (unsigned long)context->ds;
353
354 /*
355 * There is a race between the bts master and the pebs master.
356 *
357 * The thread/cpu access is synchronized via get/put_cpu() for
358 * task tracing and via wrmsr_on_cpu for cpu tracing.
359 *
360 * If bts and pebs are collected for the same task or same cpu,
361 * the same confiuration is written twice.
362 */
363 if (context->task) {
364 get_cpu();
365 if (context->task == current)
366 wrmsrl(MSR_IA32_DS_AREA, ds);
367 set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
368 put_cpu();
369 } else
370 wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
371 (u32)((u64)ds), (u32)((u64)ds >> 32));
372}
305 373
306/* 374/*
307 * Call the tracer's callback on a buffer overflow. 375 * Call the tracer's callback on a buffer overflow.
@@ -332,9 +400,9 @@ static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
332 * The remainder of any partially written record is zeroed out. 400 * The remainder of any partially written record is zeroed out.
333 * 401 *
334 * context: the DS context 402 * context: the DS context
335 * qual: the buffer type 403 * qual: the buffer type
336 * record: the data to write 404 * record: the data to write
337 * size: the size of the data 405 * size: the size of the data
338 */ 406 */
339static int ds_write(struct ds_context *context, enum ds_qualifier qual, 407static int ds_write(struct ds_context *context, enum ds_qualifier qual,
340 const void *record, size_t size) 408 const void *record, size_t size)
@@ -349,14 +417,14 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
349 unsigned long write_size, adj_write_size; 417 unsigned long write_size, adj_write_size;
350 418
351 /* 419 /*
352 * write as much as possible without producing an 420 * Write as much as possible without producing an
353 * overflow interrupt. 421 * overflow interrupt.
354 * 422 *
355 * interrupt_threshold must either be 423 * Interrupt_threshold must either be
356 * - bigger than absolute_maximum or 424 * - bigger than absolute_maximum or
357 * - point to a record between buffer_base and absolute_maximum 425 * - point to a record between buffer_base and absolute_maximum
358 * 426 *
359 * index points to a valid record. 427 * Index points to a valid record.
360 */ 428 */
361 base = ds_get(context->ds, qual, ds_buffer_base); 429 base = ds_get(context->ds, qual, ds_buffer_base);
362 index = ds_get(context->ds, qual, ds_index); 430 index = ds_get(context->ds, qual, ds_index);
@@ -365,8 +433,10 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
365 433
366 write_end = min(end, int_th); 434 write_end = min(end, int_th);
367 435
368 /* if we are already beyond the interrupt threshold, 436 /*
369 * we fill the entire buffer */ 437 * If we are already beyond the interrupt threshold,
438 * we fill the entire buffer.
439 */
370 if (write_end <= index) 440 if (write_end <= index)
371 write_end = end; 441 write_end = end;
372 442
@@ -383,7 +453,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
383 adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; 453 adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
384 adj_write_size *= ds_cfg.sizeof_rec[qual]; 454 adj_write_size *= ds_cfg.sizeof_rec[qual];
385 455
386 /* zero out trailing bytes */ 456 /* Zero out trailing bytes. */
387 memset((char *)index + write_size, 0, 457 memset((char *)index + write_size, 0,
388 adj_write_size - write_size); 458 adj_write_size - write_size);
389 index += adj_write_size; 459 index += adj_write_size;
@@ -410,7 +480,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
410 * Later architectures use 64bit pointers throughout, whereas earlier 480 * Later architectures use 64bit pointers throughout, whereas earlier
411 * architectures use 32bit pointers in 32bit mode. 481 * architectures use 32bit pointers in 32bit mode.
412 * 482 *
413 * We compute the base address for the first 8 fields based on: 483 * We compute the base address for the fields based on:
414 * - the field size stored in the DS configuration 484 * - the field size stored in the DS configuration
415 * - the relative field position 485 * - the relative field position
416 * 486 *
@@ -431,23 +501,23 @@ enum bts_field {
431 bts_to, 501 bts_to,
432 bts_flags, 502 bts_flags,
433 503
434 bts_qual = bts_from, 504 bts_qual = bts_from,
435 bts_jiffies = bts_to, 505 bts_clock = bts_to,
436 bts_pid = bts_flags, 506 bts_pid = bts_flags,
437 507
438 bts_qual_mask = (bts_qual_max - 1), 508 bts_qual_mask = (bts_qual_max - 1),
439 bts_escape = ((unsigned long)-1 & ~bts_qual_mask) 509 bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
440}; 510};
441 511
442static inline unsigned long bts_get(const char *base, enum bts_field field) 512static inline unsigned long bts_get(const char *base, enum bts_field field)
443{ 513{
444 base += (ds_cfg.sizeof_field * field); 514 base += (ds_cfg.sizeof_ptr_field * field);
445 return *(unsigned long *)base; 515 return *(unsigned long *)base;
446} 516}
447 517
448static inline void bts_set(char *base, enum bts_field field, unsigned long val) 518static inline void bts_set(char *base, enum bts_field field, unsigned long val)
449{ 519{
450 base += (ds_cfg.sizeof_field * field);; 520 base += (ds_cfg.sizeof_ptr_field * field);;
451 (*(unsigned long *)base) = val; 521 (*(unsigned long *)base) = val;
452} 522}
453 523
@@ -463,8 +533,8 @@ static inline void bts_set(char *base, enum bts_field field, unsigned long val)
463 * 533 *
464 * return: bytes read/written on success; -Eerrno, otherwise 534 * return: bytes read/written on success; -Eerrno, otherwise
465 */ 535 */
466static int bts_read(struct bts_tracer *tracer, const void *at, 536static int
467 struct bts_struct *out) 537bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
468{ 538{
469 if (!tracer) 539 if (!tracer)
470 return -EINVAL; 540 return -EINVAL;
@@ -478,8 +548,8 @@ static int bts_read(struct bts_tracer *tracer, const void *at,
478 memset(out, 0, sizeof(*out)); 548 memset(out, 0, sizeof(*out));
479 if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { 549 if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
480 out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); 550 out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
481 out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); 551 out->variant.event.clock = bts_get(at, bts_clock);
482 out->variant.timestamp.pid = bts_get(at, bts_pid); 552 out->variant.event.pid = bts_get(at, bts_pid);
483 } else { 553 } else {
484 out->qualifier = bts_branch; 554 out->qualifier = bts_branch;
485 out->variant.lbr.from = bts_get(at, bts_from); 555 out->variant.lbr.from = bts_get(at, bts_from);
@@ -516,8 +586,8 @@ static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
516 case bts_task_arrives: 586 case bts_task_arrives:
517 case bts_task_departs: 587 case bts_task_departs:
518 bts_set(raw, bts_qual, (bts_escape | in->qualifier)); 588 bts_set(raw, bts_qual, (bts_escape | in->qualifier));
519 bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); 589 bts_set(raw, bts_clock, in->variant.event.clock);
520 bts_set(raw, bts_pid, in->variant.timestamp.pid); 590 bts_set(raw, bts_pid, in->variant.event.pid);
521 break; 591 break;
522 default: 592 default:
523 return -EINVAL; 593 return -EINVAL;
@@ -555,7 +625,8 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
555 unsigned int flags) { 625 unsigned int flags) {
556 unsigned long buffer, adj; 626 unsigned long buffer, adj;
557 627
558 /* adjust the buffer address and size to meet alignment 628 /*
629 * Adjust the buffer address and size to meet alignment
559 * constraints: 630 * constraints:
560 * - buffer is double-word aligned 631 * - buffer is double-word aligned
561 * - size is multiple of record size 632 * - size is multiple of record size
@@ -577,9 +648,11 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
577 trace->begin = (void *)buffer; 648 trace->begin = (void *)buffer;
578 trace->top = trace->begin; 649 trace->top = trace->begin;
579 trace->end = (void *)(buffer + size); 650 trace->end = (void *)(buffer + size);
580 /* The value for 'no threshold' is -1, which will set the 651 /*
652 * The value for 'no threshold' is -1, which will set the
581 * threshold outside of the buffer, just like we want it. 653 * threshold outside of the buffer, just like we want it.
582 */ 654 */
655 ith *= ds_cfg.sizeof_rec[qual];
583 trace->ith = (void *)(buffer + size - ith); 656 trace->ith = (void *)(buffer + size - ith);
584 657
585 trace->flags = flags; 658 trace->flags = flags;
@@ -588,18 +661,27 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
588 661
589static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, 662static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
590 enum ds_qualifier qual, struct task_struct *task, 663 enum ds_qualifier qual, struct task_struct *task,
591 void *base, size_t size, size_t th, unsigned int flags) 664 int cpu, void *base, size_t size, size_t th)
592{ 665{
593 struct ds_context *context; 666 struct ds_context *context;
594 int error; 667 int error;
668 size_t req_size;
669
670 error = -EOPNOTSUPP;
671 if (!ds_cfg.sizeof_rec[qual])
672 goto out;
595 673
596 error = -EINVAL; 674 error = -EINVAL;
597 if (!base) 675 if (!base)
598 goto out; 676 goto out;
599 677
600 /* we require some space to do alignment adjustments below */ 678 req_size = ds_cfg.sizeof_rec[qual];
679 /* We might need space for alignment adjustments. */
680 if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT))
681 req_size += DS_ALIGNMENT;
682
601 error = -EINVAL; 683 error = -EINVAL;
602 if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) 684 if (size < req_size)
603 goto out; 685 goto out;
604 686
605 if (th != (size_t)-1) { 687 if (th != (size_t)-1) {
@@ -614,182 +696,318 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
614 tracer->size = size; 696 tracer->size = size;
615 697
616 error = -ENOMEM; 698 error = -ENOMEM;
617 context = ds_get_context(task); 699 context = ds_get_context(task, cpu);
618 if (!context) 700 if (!context)
619 goto out; 701 goto out;
620 tracer->context = context; 702 tracer->context = context;
621 703
622 ds_init_ds_trace(trace, qual, base, size, th, flags); 704 /*
705 * Defer any tracer-specific initialization work for the context until
706 * context ownership has been clarified.
707 */
623 708
624 error = 0; 709 error = 0;
625 out: 710 out:
626 return error; 711 return error;
627} 712}
628 713
629struct bts_tracer *ds_request_bts(struct task_struct *task, 714static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
630 void *base, size_t size, 715 void *base, size_t size,
631 bts_ovfl_callback_t ovfl, size_t th, 716 bts_ovfl_callback_t ovfl, size_t th,
632 unsigned int flags) 717 unsigned int flags)
633{ 718{
634 struct bts_tracer *tracer; 719 struct bts_tracer *tracer;
635 unsigned long irq;
636 int error; 720 int error;
637 721
722 /* Buffer overflow notification is not yet implemented. */
638 error = -EOPNOTSUPP; 723 error = -EOPNOTSUPP;
639 if (!ds_cfg.ctl[dsf_bts]) 724 if (ovfl)
640 goto out; 725 goto out;
641 726
642 /* buffer overflow notification is not yet implemented */ 727 error = get_tracer(task);
643 error = -EOPNOTSUPP; 728 if (error < 0)
644 if (ovfl)
645 goto out; 729 goto out;
646 730
647 error = -ENOMEM; 731 error = -ENOMEM;
648 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); 732 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
649 if (!tracer) 733 if (!tracer)
650 goto out; 734 goto out_put_tracer;
651 tracer->ovfl = ovfl; 735 tracer->ovfl = ovfl;
652 736
737 /* Do some more error checking and acquire a tracing context. */
653 error = ds_request(&tracer->ds, &tracer->trace.ds, 738 error = ds_request(&tracer->ds, &tracer->trace.ds,
654 ds_bts, task, base, size, th, flags); 739 ds_bts, task, cpu, base, size, th);
655 if (error < 0) 740 if (error < 0)
656 goto out_tracer; 741 goto out_tracer;
657 742
658 743 /* Claim the bts part of the tracing context we acquired above. */
659 spin_lock_irqsave(&ds_lock, irq); 744 spin_lock_irq(&ds_lock);
660
661 error = -EPERM;
662 if (!check_tracer(task))
663 goto out_unlock;
664 get_tracer(task);
665 745
666 error = -EPERM; 746 error = -EPERM;
667 if (tracer->ds.context->bts_master) 747 if (tracer->ds.context->bts_master)
668 goto out_put_tracer; 748 goto out_unlock;
669 tracer->ds.context->bts_master = tracer; 749 tracer->ds.context->bts_master = tracer;
670 750
671 spin_unlock_irqrestore(&ds_lock, irq); 751 spin_unlock_irq(&ds_lock);
672 752
753 /*
754 * Now that we own the bts part of the context, let's complete the
755 * initialization for that part.
756 */
757 ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
758 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
759 ds_install_ds_area(tracer->ds.context);
673 760
674 tracer->trace.read = bts_read; 761 tracer->trace.read = bts_read;
675 tracer->trace.write = bts_write; 762 tracer->trace.write = bts_write;
676 763
677 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); 764 /* Start tracing. */
678 ds_resume_bts(tracer); 765 ds_resume_bts(tracer);
679 766
680 return tracer; 767 return tracer;
681 768
682 out_put_tracer:
683 put_tracer(task);
684 out_unlock: 769 out_unlock:
685 spin_unlock_irqrestore(&ds_lock, irq); 770 spin_unlock_irq(&ds_lock);
686 ds_put_context(tracer->ds.context); 771 ds_put_context(tracer->ds.context);
687 out_tracer: 772 out_tracer:
688 kfree(tracer); 773 kfree(tracer);
774 out_put_tracer:
775 put_tracer(task);
689 out: 776 out:
690 return ERR_PTR(error); 777 return ERR_PTR(error);
691} 778}
692 779
693struct pebs_tracer *ds_request_pebs(struct task_struct *task, 780struct bts_tracer *ds_request_bts_task(struct task_struct *task,
694 void *base, size_t size, 781 void *base, size_t size,
695 pebs_ovfl_callback_t ovfl, size_t th, 782 bts_ovfl_callback_t ovfl,
696 unsigned int flags) 783 size_t th, unsigned int flags)
784{
785 return ds_request_bts(task, 0, base, size, ovfl, th, flags);
786}
787
788struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
789 bts_ovfl_callback_t ovfl,
790 size_t th, unsigned int flags)
791{
792 return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
793}
794
795static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
796 void *base, size_t size,
797 pebs_ovfl_callback_t ovfl, size_t th,
798 unsigned int flags)
697{ 799{
698 struct pebs_tracer *tracer; 800 struct pebs_tracer *tracer;
699 unsigned long irq;
700 int error; 801 int error;
701 802
702 /* buffer overflow notification is not yet implemented */ 803 /* Buffer overflow notification is not yet implemented. */
703 error = -EOPNOTSUPP; 804 error = -EOPNOTSUPP;
704 if (ovfl) 805 if (ovfl)
705 goto out; 806 goto out;
706 807
808 error = get_tracer(task);
809 if (error < 0)
810 goto out;
811
707 error = -ENOMEM; 812 error = -ENOMEM;
708 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); 813 tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
709 if (!tracer) 814 if (!tracer)
710 goto out; 815 goto out_put_tracer;
711 tracer->ovfl = ovfl; 816 tracer->ovfl = ovfl;
712 817
818 /* Do some more error checking and acquire a tracing context. */
713 error = ds_request(&tracer->ds, &tracer->trace.ds, 819 error = ds_request(&tracer->ds, &tracer->trace.ds,
714 ds_pebs, task, base, size, th, flags); 820 ds_pebs, task, cpu, base, size, th);
715 if (error < 0) 821 if (error < 0)
716 goto out_tracer; 822 goto out_tracer;
717 823
718 spin_lock_irqsave(&ds_lock, irq); 824 /* Claim the pebs part of the tracing context we acquired above. */
719 825 spin_lock_irq(&ds_lock);
720 error = -EPERM;
721 if (!check_tracer(task))
722 goto out_unlock;
723 get_tracer(task);
724 826
725 error = -EPERM; 827 error = -EPERM;
726 if (tracer->ds.context->pebs_master) 828 if (tracer->ds.context->pebs_master)
727 goto out_put_tracer; 829 goto out_unlock;
728 tracer->ds.context->pebs_master = tracer; 830 tracer->ds.context->pebs_master = tracer;
729 831
730 spin_unlock_irqrestore(&ds_lock, irq); 832 spin_unlock_irq(&ds_lock);
731 833
834 /*
835 * Now that we own the pebs part of the context, let's complete the
836 * initialization for that part.
837 */
838 ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
732 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); 839 ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
840 ds_install_ds_area(tracer->ds.context);
841
842 /* Start tracing. */
733 ds_resume_pebs(tracer); 843 ds_resume_pebs(tracer);
734 844
735 return tracer; 845 return tracer;
736 846
737 out_put_tracer:
738 put_tracer(task);
739 out_unlock: 847 out_unlock:
740 spin_unlock_irqrestore(&ds_lock, irq); 848 spin_unlock_irq(&ds_lock);
741 ds_put_context(tracer->ds.context); 849 ds_put_context(tracer->ds.context);
742 out_tracer: 850 out_tracer:
743 kfree(tracer); 851 kfree(tracer);
852 out_put_tracer:
853 put_tracer(task);
744 out: 854 out:
745 return ERR_PTR(error); 855 return ERR_PTR(error);
746} 856}
747 857
748void ds_release_bts(struct bts_tracer *tracer) 858struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
859 void *base, size_t size,
860 pebs_ovfl_callback_t ovfl,
861 size_t th, unsigned int flags)
749{ 862{
750 if (!tracer) 863 return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
751 return; 864}
752 865
753 ds_suspend_bts(tracer); 866struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
867 pebs_ovfl_callback_t ovfl,
868 size_t th, unsigned int flags)
869{
870 return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
871}
872
873static void ds_free_bts(struct bts_tracer *tracer)
874{
875 struct task_struct *task;
876
877 task = tracer->ds.context->task;
754 878
755 WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); 879 WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
756 tracer->ds.context->bts_master = NULL; 880 tracer->ds.context->bts_master = NULL;
757 881
758 put_tracer(tracer->ds.context->task); 882 /* Make sure tracing stopped and the tracer is not in use. */
883 if (task && (task != current))
884 wait_task_context_switch(task);
885
759 ds_put_context(tracer->ds.context); 886 ds_put_context(tracer->ds.context);
887 put_tracer(task);
760 888
761 kfree(tracer); 889 kfree(tracer);
762} 890}
763 891
892void ds_release_bts(struct bts_tracer *tracer)
893{
894 might_sleep();
895
896 if (!tracer)
897 return;
898
899 ds_suspend_bts(tracer);
900 ds_free_bts(tracer);
901}
902
903int ds_release_bts_noirq(struct bts_tracer *tracer)
904{
905 struct task_struct *task;
906 unsigned long irq;
907 int error;
908
909 if (!tracer)
910 return 0;
911
912 task = tracer->ds.context->task;
913
914 local_irq_save(irq);
915
916 error = -EPERM;
917 if (!task &&
918 (tracer->ds.context->cpu != smp_processor_id()))
919 goto out;
920
921 error = -EPERM;
922 if (task && (task != current))
923 goto out;
924
925 ds_suspend_bts_noirq(tracer);
926 ds_free_bts(tracer);
927
928 error = 0;
929 out:
930 local_irq_restore(irq);
931 return error;
932}
933
934static void update_task_debugctlmsr(struct task_struct *task,
935 unsigned long debugctlmsr)
936{
937 task->thread.debugctlmsr = debugctlmsr;
938
939 get_cpu();
940 if (task == current)
941 update_debugctlmsr(debugctlmsr);
942 put_cpu();
943}
944
764void ds_suspend_bts(struct bts_tracer *tracer) 945void ds_suspend_bts(struct bts_tracer *tracer)
765{ 946{
766 struct task_struct *task; 947 struct task_struct *task;
948 unsigned long debugctlmsr;
949 int cpu;
767 950
768 if (!tracer) 951 if (!tracer)
769 return; 952 return;
770 953
954 tracer->flags = 0;
955
771 task = tracer->ds.context->task; 956 task = tracer->ds.context->task;
957 cpu = tracer->ds.context->cpu;
772 958
773 if (!task || (task == current)) 959 WARN_ON(!task && irqs_disabled());
774 update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
775 960
776 if (task) { 961 debugctlmsr = (task ?
777 task->thread.debugctlmsr &= ~BTS_CONTROL; 962 task->thread.debugctlmsr :
963 get_debugctlmsr_on_cpu(cpu));
964 debugctlmsr &= ~BTS_CONTROL;
778 965
779 if (!task->thread.debugctlmsr) 966 if (task)
780 clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); 967 update_task_debugctlmsr(task, debugctlmsr);
781 } 968 else
969 update_debugctlmsr_on_cpu(cpu, debugctlmsr);
782} 970}
783 971
784void ds_resume_bts(struct bts_tracer *tracer) 972int ds_suspend_bts_noirq(struct bts_tracer *tracer)
785{ 973{
786 struct task_struct *task; 974 struct task_struct *task;
787 unsigned long control; 975 unsigned long debugctlmsr, irq;
976 int cpu, error = 0;
788 977
789 if (!tracer) 978 if (!tracer)
790 return; 979 return 0;
980
981 tracer->flags = 0;
791 982
792 task = tracer->ds.context->task; 983 task = tracer->ds.context->task;
984 cpu = tracer->ds.context->cpu;
985
986 local_irq_save(irq);
987
988 error = -EPERM;
989 if (!task && (cpu != smp_processor_id()))
990 goto out;
991
992 debugctlmsr = (task ?
993 task->thread.debugctlmsr :
994 get_debugctlmsr());
995 debugctlmsr &= ~BTS_CONTROL;
996
997 if (task)
998 update_task_debugctlmsr(task, debugctlmsr);
999 else
1000 update_debugctlmsr(debugctlmsr);
1001
1002 error = 0;
1003 out:
1004 local_irq_restore(irq);
1005 return error;
1006}
1007
1008static unsigned long ds_bts_control(struct bts_tracer *tracer)
1009{
1010 unsigned long control;
793 1011
794 control = ds_cfg.ctl[dsf_bts]; 1012 control = ds_cfg.ctl[dsf_bts];
795 if (!(tracer->trace.ds.flags & BTS_KERNEL)) 1013 if (!(tracer->trace.ds.flags & BTS_KERNEL))
@@ -797,41 +1015,149 @@ void ds_resume_bts(struct bts_tracer *tracer)
797 if (!(tracer->trace.ds.flags & BTS_USER)) 1015 if (!(tracer->trace.ds.flags & BTS_USER))
798 control |= ds_cfg.ctl[dsf_bts_user]; 1016 control |= ds_cfg.ctl[dsf_bts_user];
799 1017
800 if (task) { 1018 return control;
801 task->thread.debugctlmsr |= control;
802 set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
803 }
804
805 if (!task || (task == current))
806 update_debugctlmsr(get_debugctlmsr() | control);
807} 1019}
808 1020
809void ds_release_pebs(struct pebs_tracer *tracer) 1021void ds_resume_bts(struct bts_tracer *tracer)
810{ 1022{
1023 struct task_struct *task;
1024 unsigned long debugctlmsr;
1025 int cpu;
1026
811 if (!tracer) 1027 if (!tracer)
812 return; 1028 return;
813 1029
814 ds_suspend_pebs(tracer); 1030 tracer->flags = tracer->trace.ds.flags;
1031
1032 task = tracer->ds.context->task;
1033 cpu = tracer->ds.context->cpu;
1034
1035 WARN_ON(!task && irqs_disabled());
1036
1037 debugctlmsr = (task ?
1038 task->thread.debugctlmsr :
1039 get_debugctlmsr_on_cpu(cpu));
1040 debugctlmsr |= ds_bts_control(tracer);
1041
1042 if (task)
1043 update_task_debugctlmsr(task, debugctlmsr);
1044 else
1045 update_debugctlmsr_on_cpu(cpu, debugctlmsr);
1046}
1047
1048int ds_resume_bts_noirq(struct bts_tracer *tracer)
1049{
1050 struct task_struct *task;
1051 unsigned long debugctlmsr, irq;
1052 int cpu, error = 0;
1053
1054 if (!tracer)
1055 return 0;
1056
1057 tracer->flags = tracer->trace.ds.flags;
1058
1059 task = tracer->ds.context->task;
1060 cpu = tracer->ds.context->cpu;
1061
1062 local_irq_save(irq);
1063
1064 error = -EPERM;
1065 if (!task && (cpu != smp_processor_id()))
1066 goto out;
1067
1068 debugctlmsr = (task ?
1069 task->thread.debugctlmsr :
1070 get_debugctlmsr());
1071 debugctlmsr |= ds_bts_control(tracer);
1072
1073 if (task)
1074 update_task_debugctlmsr(task, debugctlmsr);
1075 else
1076 update_debugctlmsr(debugctlmsr);
1077
1078 error = 0;
1079 out:
1080 local_irq_restore(irq);
1081 return error;
1082}
1083
1084static void ds_free_pebs(struct pebs_tracer *tracer)
1085{
1086 struct task_struct *task;
1087
1088 task = tracer->ds.context->task;
815 1089
816 WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); 1090 WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
817 tracer->ds.context->pebs_master = NULL; 1091 tracer->ds.context->pebs_master = NULL;
818 1092
819 put_tracer(tracer->ds.context->task);
820 ds_put_context(tracer->ds.context); 1093 ds_put_context(tracer->ds.context);
1094 put_tracer(task);
821 1095
822 kfree(tracer); 1096 kfree(tracer);
823} 1097}
824 1098
1099void ds_release_pebs(struct pebs_tracer *tracer)
1100{
1101 might_sleep();
1102
1103 if (!tracer)
1104 return;
1105
1106 ds_suspend_pebs(tracer);
1107 ds_free_pebs(tracer);
1108}
1109
1110int ds_release_pebs_noirq(struct pebs_tracer *tracer)
1111{
1112 struct task_struct *task;
1113 unsigned long irq;
1114 int error;
1115
1116 if (!tracer)
1117 return 0;
1118
1119 task = tracer->ds.context->task;
1120
1121 local_irq_save(irq);
1122
1123 error = -EPERM;
1124 if (!task &&
1125 (tracer->ds.context->cpu != smp_processor_id()))
1126 goto out;
1127
1128 error = -EPERM;
1129 if (task && (task != current))
1130 goto out;
1131
1132 ds_suspend_pebs_noirq(tracer);
1133 ds_free_pebs(tracer);
1134
1135 error = 0;
1136 out:
1137 local_irq_restore(irq);
1138 return error;
1139}
1140
825void ds_suspend_pebs(struct pebs_tracer *tracer) 1141void ds_suspend_pebs(struct pebs_tracer *tracer)
826{ 1142{
827 1143
828} 1144}
829 1145
1146int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
1147{
1148 return 0;
1149}
1150
830void ds_resume_pebs(struct pebs_tracer *tracer) 1151void ds_resume_pebs(struct pebs_tracer *tracer)
831{ 1152{
832 1153
833} 1154}
834 1155
1156int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
1157{
1158 return 0;
1159}
1160
835const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) 1161const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
836{ 1162{
837 if (!tracer) 1163 if (!tracer)
@@ -847,8 +1173,12 @@ const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
847 return NULL; 1173 return NULL;
848 1174
849 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); 1175 ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
850 tracer->trace.reset_value = 1176
851 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); 1177 tracer->trace.counters = ds_cfg.nr_counter_reset;
1178 memcpy(tracer->trace.counter_reset,
1179 tracer->ds.context->ds +
1180 (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field),
1181 ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE);
852 1182
853 return &tracer->trace; 1183 return &tracer->trace;
854} 1184}
@@ -873,18 +1203,24 @@ int ds_reset_pebs(struct pebs_tracer *tracer)
873 1203
874 tracer->trace.ds.top = tracer->trace.ds.begin; 1204 tracer->trace.ds.top = tracer->trace.ds.begin;
875 1205
876 ds_set(tracer->ds.context->ds, ds_bts, ds_index, 1206 ds_set(tracer->ds.context->ds, ds_pebs, ds_index,
877 (unsigned long)tracer->trace.ds.top); 1207 (unsigned long)tracer->trace.ds.top);
878 1208
879 return 0; 1209 return 0;
880} 1210}
881 1211
882int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) 1212int ds_set_pebs_reset(struct pebs_tracer *tracer,
1213 unsigned int counter, u64 value)
883{ 1214{
884 if (!tracer) 1215 if (!tracer)
885 return -EINVAL; 1216 return -EINVAL;
886 1217
887 *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; 1218 if (ds_cfg.nr_counter_reset < counter)
1219 return -EINVAL;
1220
1221 *(u64 *)(tracer->ds.context->ds +
1222 (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) +
1223 (counter * PEBS_RESET_FIELD_SIZE)) = value;
888 1224
889 return 0; 1225 return 0;
890} 1226}
@@ -894,73 +1230,117 @@ static const struct ds_configuration ds_cfg_netburst = {
894 .ctl[dsf_bts] = (1 << 2) | (1 << 3), 1230 .ctl[dsf_bts] = (1 << 2) | (1 << 3),
895 .ctl[dsf_bts_kernel] = (1 << 5), 1231 .ctl[dsf_bts_kernel] = (1 << 5),
896 .ctl[dsf_bts_user] = (1 << 6), 1232 .ctl[dsf_bts_user] = (1 << 6),
897 1233 .nr_counter_reset = 1,
898 .sizeof_field = sizeof(long),
899 .sizeof_rec[ds_bts] = sizeof(long) * 3,
900#ifdef __i386__
901 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
902#else
903 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
904#endif
905}; 1234};
906static const struct ds_configuration ds_cfg_pentium_m = { 1235static const struct ds_configuration ds_cfg_pentium_m = {
907 .name = "Pentium M", 1236 .name = "Pentium M",
908 .ctl[dsf_bts] = (1 << 6) | (1 << 7), 1237 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
909 1238 .nr_counter_reset = 1,
910 .sizeof_field = sizeof(long),
911 .sizeof_rec[ds_bts] = sizeof(long) * 3,
912#ifdef __i386__
913 .sizeof_rec[ds_pebs] = sizeof(long) * 10,
914#else
915 .sizeof_rec[ds_pebs] = sizeof(long) * 18,
916#endif
917}; 1239};
918static const struct ds_configuration ds_cfg_core2_atom = { 1240static const struct ds_configuration ds_cfg_core2_atom = {
919 .name = "Core 2/Atom", 1241 .name = "Core 2/Atom",
920 .ctl[dsf_bts] = (1 << 6) | (1 << 7), 1242 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
921 .ctl[dsf_bts_kernel] = (1 << 9), 1243 .ctl[dsf_bts_kernel] = (1 << 9),
922 .ctl[dsf_bts_user] = (1 << 10), 1244 .ctl[dsf_bts_user] = (1 << 10),
923 1245 .nr_counter_reset = 1,
924 .sizeof_field = 8, 1246};
925 .sizeof_rec[ds_bts] = 8 * 3, 1247static const struct ds_configuration ds_cfg_core_i7 = {
926 .sizeof_rec[ds_pebs] = 8 * 18, 1248 .name = "Core i7",
1249 .ctl[dsf_bts] = (1 << 6) | (1 << 7),
1250 .ctl[dsf_bts_kernel] = (1 << 9),
1251 .ctl[dsf_bts_user] = (1 << 10),
1252 .nr_counter_reset = 4,
927}; 1253};
928 1254
929static void 1255static void
930ds_configure(const struct ds_configuration *cfg) 1256ds_configure(const struct ds_configuration *cfg,
1257 struct cpuinfo_x86 *cpu)
931{ 1258{
1259 unsigned long nr_pebs_fields = 0;
1260
1261 printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
1262
1263#ifdef __i386__
1264 nr_pebs_fields = 10;
1265#else
1266 nr_pebs_fields = 18;
1267#endif
1268
1269 /*
1270 * Starting with version 2, architectural performance
1271 * monitoring supports a format specifier.
1272 */
1273 if ((cpuid_eax(0xa) & 0xff) > 1) {
1274 unsigned long perf_capabilities, format;
1275
1276 rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
1277
1278 format = (perf_capabilities >> 8) & 0xf;
1279
1280 switch (format) {
1281 case 0:
1282 nr_pebs_fields = 18;
1283 break;
1284 case 1:
1285 nr_pebs_fields = 22;
1286 break;
1287 default:
1288 printk(KERN_INFO
1289 "[ds] unknown PEBS format: %lu\n", format);
1290 nr_pebs_fields = 0;
1291 break;
1292 }
1293 }
1294
932 memset(&ds_cfg, 0, sizeof(ds_cfg)); 1295 memset(&ds_cfg, 0, sizeof(ds_cfg));
933 ds_cfg = *cfg; 1296 ds_cfg = *cfg;
934 1297
935 printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); 1298 ds_cfg.sizeof_ptr_field =
1299 (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
1300
1301 ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3;
1302 ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
936 1303
937 if (!cpu_has_bts) { 1304 if (!cpu_has(cpu, X86_FEATURE_BTS)) {
938 ds_cfg.ctl[dsf_bts] = 0; 1305 ds_cfg.sizeof_rec[ds_bts] = 0;
939 printk(KERN_INFO "[ds] bts not available\n"); 1306 printk(KERN_INFO "[ds] bts not available\n");
940 } 1307 }
941 if (!cpu_has_pebs) 1308 if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
1309 ds_cfg.sizeof_rec[ds_pebs] = 0;
942 printk(KERN_INFO "[ds] pebs not available\n"); 1310 printk(KERN_INFO "[ds] pebs not available\n");
1311 }
1312
1313 printk(KERN_INFO "[ds] sizes: address: %u bit, ",
1314 8 * ds_cfg.sizeof_ptr_field);
1315 printk("bts/pebs record: %u/%u bytes\n",
1316 ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
943 1317
944 WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); 1318 WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset);
945} 1319}
946 1320
947void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) 1321void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
948{ 1322{
1323 /* Only configure the first cpu. Others are identical. */
1324 if (ds_cfg.name)
1325 return;
1326
949 switch (c->x86) { 1327 switch (c->x86) {
950 case 0x6: 1328 case 0x6:
951 switch (c->x86_model) { 1329 switch (c->x86_model) {
952 case 0x9: 1330 case 0x9:
953 case 0xd: /* Pentium M */ 1331 case 0xd: /* Pentium M */
954 ds_configure(&ds_cfg_pentium_m); 1332 ds_configure(&ds_cfg_pentium_m, c);
955 break; 1333 break;
956 case 0xf: 1334 case 0xf:
957 case 0x17: /* Core2 */ 1335 case 0x17: /* Core2 */
958 case 0x1c: /* Atom */ 1336 case 0x1c: /* Atom */
959 ds_configure(&ds_cfg_core2_atom); 1337 ds_configure(&ds_cfg_core2_atom, c);
1338 break;
1339 case 0x1a: /* Core i7 */
1340 ds_configure(&ds_cfg_core_i7, c);
960 break; 1341 break;
961 case 0x1a: /* i7 */
962 default: 1342 default:
963 /* sorry, don't know about them */ 1343 /* Sorry, don't know about them. */
964 break; 1344 break;
965 } 1345 }
966 break; 1346 break;
@@ -969,64 +1349,89 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
969 case 0x0: 1349 case 0x0:
970 case 0x1: 1350 case 0x1:
971 case 0x2: /* Netburst */ 1351 case 0x2: /* Netburst */
972 ds_configure(&ds_cfg_netburst); 1352 ds_configure(&ds_cfg_netburst, c);
973 break; 1353 break;
974 default: 1354 default:
975 /* sorry, don't know about them */ 1355 /* Sorry, don't know about them. */
976 break; 1356 break;
977 } 1357 }
978 break; 1358 break;
979 default: 1359 default:
980 /* sorry, don't know about them */ 1360 /* Sorry, don't know about them. */
981 break; 1361 break;
982 } 1362 }
983} 1363}
984 1364
1365static inline void ds_take_timestamp(struct ds_context *context,
1366 enum bts_qualifier qualifier,
1367 struct task_struct *task)
1368{
1369 struct bts_tracer *tracer = context->bts_master;
1370 struct bts_struct ts;
1371
1372 /* Prevent compilers from reading the tracer pointer twice. */
1373 barrier();
1374
1375 if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
1376 return;
1377
1378 memset(&ts, 0, sizeof(ts));
1379 ts.qualifier = qualifier;
1380 ts.variant.event.clock = trace_clock_global();
1381 ts.variant.event.pid = task->pid;
1382
1383 bts_write(tracer, &ts);
1384}
1385
985/* 1386/*
986 * Change the DS configuration from tracing prev to tracing next. 1387 * Change the DS configuration from tracing prev to tracing next.
987 */ 1388 */
988void ds_switch_to(struct task_struct *prev, struct task_struct *next) 1389void ds_switch_to(struct task_struct *prev, struct task_struct *next)
989{ 1390{
990 struct ds_context *prev_ctx = prev->thread.ds_ctx; 1391 struct ds_context *prev_ctx = prev->thread.ds_ctx;
991 struct ds_context *next_ctx = next->thread.ds_ctx; 1392 struct ds_context *next_ctx = next->thread.ds_ctx;
1393 unsigned long debugctlmsr = next->thread.debugctlmsr;
1394
1395 /* Make sure all data is read before we start. */
1396 barrier();
992 1397
993 if (prev_ctx) { 1398 if (prev_ctx) {
994 update_debugctlmsr(0); 1399 update_debugctlmsr(0);
995 1400
996 if (prev_ctx->bts_master && 1401 ds_take_timestamp(prev_ctx, bts_task_departs, prev);
997 (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
998 struct bts_struct ts = {
999 .qualifier = bts_task_departs,
1000 .variant.timestamp.jiffies = jiffies_64,
1001 .variant.timestamp.pid = prev->pid
1002 };
1003 bts_write(prev_ctx->bts_master, &ts);
1004 }
1005 } 1402 }
1006 1403
1007 if (next_ctx) { 1404 if (next_ctx) {
1008 if (next_ctx->bts_master && 1405 ds_take_timestamp(next_ctx, bts_task_arrives, next);
1009 (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
1010 struct bts_struct ts = {
1011 .qualifier = bts_task_arrives,
1012 .variant.timestamp.jiffies = jiffies_64,
1013 .variant.timestamp.pid = next->pid
1014 };
1015 bts_write(next_ctx->bts_master, &ts);
1016 }
1017 1406
1018 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); 1407 wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
1019 } 1408 }
1020 1409
1021 update_debugctlmsr(next->thread.debugctlmsr); 1410 update_debugctlmsr(debugctlmsr);
1022} 1411}
1023 1412
1024void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) 1413static __init int ds_selftest(void)
1025{ 1414{
1026 clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); 1415 if (ds_cfg.sizeof_rec[ds_bts]) {
1027 tsk->thread.ds_ctx = NULL; 1416 int error;
1028}
1029 1417
1030void ds_exit_thread(struct task_struct *tsk) 1418 error = ds_selftest_bts();
1031{ 1419 if (error) {
1420 WARN(1, "[ds] selftest failed. disabling bts.\n");
1421 ds_cfg.sizeof_rec[ds_bts] = 0;
1422 }
1423 }
1424
1425 if (ds_cfg.sizeof_rec[ds_pebs]) {
1426 int error;
1427
1428 error = ds_selftest_pebs();
1429 if (error) {
1430 WARN(1, "[ds] selftest failed. disabling pebs.\n");
1431 ds_cfg.sizeof_rec[ds_pebs] = 0;
1432 }
1433 }
1434
1435 return 0;
1032} 1436}
1437device_initcall(ds_selftest);