diff options
Diffstat (limited to 'arch/x86/kernel/ds.c')
| -rw-r--r-- | arch/x86/kernel/ds.c | 1437 | 
1 files changed, 0 insertions, 1437 deletions
| diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c deleted file mode 100644 index 1c47390dd0e5..000000000000 --- a/arch/x86/kernel/ds.c +++ /dev/null | |||
| @@ -1,1437 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * Debug Store support | ||
| 3 | * | ||
| 4 | * This provides a low-level interface to the hardware's Debug Store | ||
| 5 | * feature that is used for branch trace store (BTS) and | ||
| 6 | * precise-event based sampling (PEBS). | ||
| 7 | * | ||
| 8 | * It manages: | ||
| 9 | * - DS and BTS hardware configuration | ||
| 10 | * - buffer overflow handling (to be done) | ||
| 11 | * - buffer access | ||
| 12 | * | ||
| 13 | * It does not do: | ||
| 14 | * - security checking (is the caller allowed to trace the task) | ||
| 15 | * - buffer allocation (memory accounting) | ||
| 16 | * | ||
| 17 | * | ||
| 18 | * Copyright (C) 2007-2009 Intel Corporation. | ||
| 19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 | ||
| 20 | */ | ||
| 21 | |||
| 22 | #include <linux/kernel.h> | ||
| 23 | #include <linux/string.h> | ||
| 24 | #include <linux/errno.h> | ||
| 25 | #include <linux/sched.h> | ||
| 26 | #include <linux/slab.h> | ||
| 27 | #include <linux/mm.h> | ||
| 28 | #include <linux/trace_clock.h> | ||
| 29 | |||
| 30 | #include <asm/ds.h> | ||
| 31 | |||
| 32 | #include "ds_selftest.h" | ||
| 33 | |||
| 34 | /* | ||
| 35 | * The configuration for a particular DS hardware implementation: | ||
| 36 | */ | ||
| 37 | struct ds_configuration { | ||
| 38 | /* The name of the configuration: */ | ||
| 39 | const char *name; | ||
| 40 | |||
| 41 | /* The size of pointer-typed fields in DS, BTS, and PEBS: */ | ||
| 42 | unsigned char sizeof_ptr_field; | ||
| 43 | |||
| 44 | /* The size of a BTS/PEBS record in bytes: */ | ||
| 45 | unsigned char sizeof_rec[2]; | ||
| 46 | |||
| 47 | /* The number of pebs counter reset values in the DS structure. */ | ||
| 48 | unsigned char nr_counter_reset; | ||
| 49 | |||
| 50 | /* Control bit-masks indexed by enum ds_feature: */ | ||
| 51 | unsigned long ctl[dsf_ctl_max]; | ||
| 52 | }; | ||
| 53 | static struct ds_configuration ds_cfg __read_mostly; | ||
| 54 | |||
| 55 | |||
| 56 | /* Maximal size of a DS configuration: */ | ||
| 57 | #define MAX_SIZEOF_DS 0x80 | ||
| 58 | |||
| 59 | /* Maximal size of a BTS record: */ | ||
| 60 | #define MAX_SIZEOF_BTS (3 * 8) | ||
| 61 | |||
| 62 | /* BTS and PEBS buffer alignment: */ | ||
| 63 | #define DS_ALIGNMENT (1 << 3) | ||
| 64 | |||
| 65 | /* Number of buffer pointers in DS: */ | ||
| 66 | #define NUM_DS_PTR_FIELDS 8 | ||
| 67 | |||
| 68 | /* Size of a pebs reset value in DS: */ | ||
| 69 | #define PEBS_RESET_FIELD_SIZE 8 | ||
| 70 | |||
| 71 | /* Mask of control bits in the DS MSR register: */ | ||
| 72 | #define BTS_CONTROL \ | ||
| 73 | ( ds_cfg.ctl[dsf_bts] | \ | ||
| 74 | ds_cfg.ctl[dsf_bts_kernel] | \ | ||
| 75 | ds_cfg.ctl[dsf_bts_user] | \ | ||
| 76 | ds_cfg.ctl[dsf_bts_overflow] ) | ||
| 77 | |||
| 78 | /* | ||
| 79 | * A BTS or PEBS tracer. | ||
| 80 | * | ||
| 81 | * This holds the configuration of the tracer and serves as a handle | ||
| 82 | * to identify tracers. | ||
| 83 | */ | ||
| 84 | struct ds_tracer { | ||
| 85 | /* The DS context (partially) owned by this tracer. */ | ||
| 86 | struct ds_context *context; | ||
| 87 | /* The buffer provided on ds_request() and its size in bytes. */ | ||
| 88 | void *buffer; | ||
| 89 | size_t size; | ||
| 90 | }; | ||
| 91 | |||
| 92 | struct bts_tracer { | ||
| 93 | /* The common DS part: */ | ||
| 94 | struct ds_tracer ds; | ||
| 95 | |||
| 96 | /* The trace including the DS configuration: */ | ||
| 97 | struct bts_trace trace; | ||
| 98 | |||
| 99 | /* Buffer overflow notification function: */ | ||
| 100 | bts_ovfl_callback_t ovfl; | ||
| 101 | |||
| 102 | /* Active flags affecting trace collection. */ | ||
| 103 | unsigned int flags; | ||
| 104 | }; | ||
| 105 | |||
| 106 | struct pebs_tracer { | ||
| 107 | /* The common DS part: */ | ||
| 108 | struct ds_tracer ds; | ||
| 109 | |||
| 110 | /* The trace including the DS configuration: */ | ||
| 111 | struct pebs_trace trace; | ||
| 112 | |||
| 113 | /* Buffer overflow notification function: */ | ||
| 114 | pebs_ovfl_callback_t ovfl; | ||
| 115 | }; | ||
| 116 | |||
| 117 | /* | ||
| 118 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | ||
| 119 | * Architectures Software Developer's Manual, section 18.5) | ||
| 120 | * | ||
| 121 | * The DS configuration consists of the following fields; different | ||
| 122 | * architetures vary in the size of those fields. | ||
| 123 | * | ||
| 124 | * - double-word aligned base linear address of the BTS buffer | ||
| 125 | * - write pointer into the BTS buffer | ||
| 126 | * - end linear address of the BTS buffer (one byte beyond the end of | ||
| 127 | * the buffer) | ||
| 128 | * - interrupt pointer into BTS buffer | ||
| 129 | * (interrupt occurs when write pointer passes interrupt pointer) | ||
| 130 | * - double-word aligned base linear address of the PEBS buffer | ||
| 131 | * - write pointer into the PEBS buffer | ||
| 132 | * - end linear address of the PEBS buffer (one byte beyond the end of | ||
| 133 | * the buffer) | ||
| 134 | * - interrupt pointer into PEBS buffer | ||
| 135 | * (interrupt occurs when write pointer passes interrupt pointer) | ||
| 136 | * - value to which counter is reset following counter overflow | ||
| 137 | * | ||
| 138 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
| 139 | * architectures use 32bit pointers in 32bit mode. | ||
| 140 | * | ||
| 141 | * | ||
| 142 | * We compute the base address for the first 8 fields based on: | ||
| 143 | * - the field size stored in the DS configuration | ||
| 144 | * - the relative field position | ||
| 145 | * - an offset giving the start of the respective region | ||
| 146 | * | ||
| 147 | * This offset is further used to index various arrays holding | ||
| 148 | * information for BTS and PEBS at the respective index. | ||
| 149 | * | ||
| 150 | * On later 32bit processors, we only access the lower 32bit of the | ||
| 151 | * 64bit pointer fields. The upper halves will be zeroed out. | ||
| 152 | */ | ||
| 153 | |||
| 154 | enum ds_field { | ||
| 155 | ds_buffer_base = 0, | ||
| 156 | ds_index, | ||
| 157 | ds_absolute_maximum, | ||
| 158 | ds_interrupt_threshold, | ||
| 159 | }; | ||
| 160 | |||
| 161 | enum ds_qualifier { | ||
| 162 | ds_bts = 0, | ||
| 163 | ds_pebs | ||
| 164 | }; | ||
| 165 | |||
| 166 | static inline unsigned long | ||
| 167 | ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field) | ||
| 168 | { | ||
| 169 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); | ||
| 170 | return *(unsigned long *)base; | ||
| 171 | } | ||
| 172 | |||
| 173 | static inline void | ||
| 174 | ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field, | ||
| 175 | unsigned long value) | ||
| 176 | { | ||
| 177 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); | ||
| 178 | (*(unsigned long *)base) = value; | ||
| 179 | } | ||
| 180 | |||
| 181 | |||
| 182 | /* | ||
| 183 | * Locking is done only for allocating BTS or PEBS resources. | ||
| 184 | */ | ||
| 185 | static DEFINE_SPINLOCK(ds_lock); | ||
| 186 | |||
| 187 | /* | ||
| 188 | * We either support (system-wide) per-cpu or per-thread allocation. | ||
| 189 | * We distinguish the two based on the task_struct pointer, where a | ||
| 190 | * NULL pointer indicates per-cpu allocation for the current cpu. | ||
| 191 | * | ||
| 192 | * Allocations are use-counted. As soon as resources are allocated, | ||
| 193 | * further allocations must be of the same type (per-cpu or | ||
| 194 | * per-thread). We model this by counting allocations (i.e. the number | ||
| 195 | * of tracers of a certain type) for one type negatively: | ||
| 196 | * =0 no tracers | ||
| 197 | * >0 number of per-thread tracers | ||
| 198 | * <0 number of per-cpu tracers | ||
| 199 | * | ||
| 200 | * Tracers essentially gives the number of ds contexts for a certain | ||
| 201 | * type of allocation. | ||
| 202 | */ | ||
| 203 | static atomic_t tracers = ATOMIC_INIT(0); | ||
| 204 | |||
| 205 | static inline int get_tracer(struct task_struct *task) | ||
| 206 | { | ||
| 207 | int error; | ||
| 208 | |||
| 209 | spin_lock_irq(&ds_lock); | ||
| 210 | |||
| 211 | if (task) { | ||
| 212 | error = -EPERM; | ||
| 213 | if (atomic_read(&tracers) < 0) | ||
| 214 | goto out; | ||
| 215 | atomic_inc(&tracers); | ||
| 216 | } else { | ||
| 217 | error = -EPERM; | ||
| 218 | if (atomic_read(&tracers) > 0) | ||
| 219 | goto out; | ||
| 220 | atomic_dec(&tracers); | ||
| 221 | } | ||
| 222 | |||
| 223 | error = 0; | ||
| 224 | out: | ||
| 225 | spin_unlock_irq(&ds_lock); | ||
| 226 | return error; | ||
| 227 | } | ||
| 228 | |||
| 229 | static inline void put_tracer(struct task_struct *task) | ||
| 230 | { | ||
| 231 | if (task) | ||
| 232 | atomic_dec(&tracers); | ||
| 233 | else | ||
| 234 | atomic_inc(&tracers); | ||
| 235 | } | ||
| 236 | |||
| 237 | /* | ||
| 238 | * The DS context is either attached to a thread or to a cpu: | ||
| 239 | * - in the former case, the thread_struct contains a pointer to the | ||
| 240 | * attached context. | ||
| 241 | * - in the latter case, we use a static array of per-cpu context | ||
| 242 | * pointers. | ||
| 243 | * | ||
| 244 | * Contexts are use-counted. They are allocated on first access and | ||
| 245 | * deallocated when the last user puts the context. | ||
| 246 | */ | ||
| 247 | struct ds_context { | ||
| 248 | /* The DS configuration; goes into MSR_IA32_DS_AREA: */ | ||
| 249 | unsigned char ds[MAX_SIZEOF_DS]; | ||
| 250 | |||
| 251 | /* The owner of the BTS and PEBS configuration, respectively: */ | ||
| 252 | struct bts_tracer *bts_master; | ||
| 253 | struct pebs_tracer *pebs_master; | ||
| 254 | |||
| 255 | /* Use count: */ | ||
| 256 | unsigned long count; | ||
| 257 | |||
| 258 | /* Pointer to the context pointer field: */ | ||
| 259 | struct ds_context **this; | ||
| 260 | |||
| 261 | /* The traced task; NULL for cpu tracing: */ | ||
| 262 | struct task_struct *task; | ||
| 263 | |||
| 264 | /* The traced cpu; only valid if task is NULL: */ | ||
| 265 | int cpu; | ||
| 266 | }; | ||
| 267 | |||
| 268 | static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context); | ||
| 269 | |||
| 270 | |||
| 271 | static struct ds_context *ds_get_context(struct task_struct *task, int cpu) | ||
| 272 | { | ||
| 273 | struct ds_context **p_context = | ||
| 274 | (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu)); | ||
| 275 | struct ds_context *context = NULL; | ||
| 276 | struct ds_context *new_context = NULL; | ||
| 277 | |||
| 278 | /* Chances are small that we already have a context. */ | ||
| 279 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); | ||
| 280 | if (!new_context) | ||
| 281 | return NULL; | ||
| 282 | |||
| 283 | spin_lock_irq(&ds_lock); | ||
| 284 | |||
| 285 | context = *p_context; | ||
| 286 | if (likely(!context)) { | ||
| 287 | context = new_context; | ||
| 288 | |||
| 289 | context->this = p_context; | ||
| 290 | context->task = task; | ||
| 291 | context->cpu = cpu; | ||
| 292 | context->count = 0; | ||
| 293 | |||
| 294 | *p_context = context; | ||
| 295 | } | ||
| 296 | |||
| 297 | context->count++; | ||
| 298 | |||
| 299 | spin_unlock_irq(&ds_lock); | ||
| 300 | |||
| 301 | if (context != new_context) | ||
| 302 | kfree(new_context); | ||
| 303 | |||
| 304 | return context; | ||
| 305 | } | ||
| 306 | |||
| 307 | static void ds_put_context(struct ds_context *context) | ||
| 308 | { | ||
| 309 | struct task_struct *task; | ||
| 310 | unsigned long irq; | ||
| 311 | |||
| 312 | if (!context) | ||
| 313 | return; | ||
| 314 | |||
| 315 | spin_lock_irqsave(&ds_lock, irq); | ||
| 316 | |||
| 317 | if (--context->count) { | ||
| 318 | spin_unlock_irqrestore(&ds_lock, irq); | ||
| 319 | return; | ||
| 320 | } | ||
| 321 | |||
| 322 | *(context->this) = NULL; | ||
| 323 | |||
| 324 | task = context->task; | ||
| 325 | |||
| 326 | if (task) | ||
| 327 | clear_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
| 328 | |||
| 329 | /* | ||
| 330 | * We leave the (now dangling) pointer to the DS configuration in | ||
| 331 | * the DS_AREA msr. This is as good or as bad as replacing it with | ||
| 332 | * NULL - the hardware would crash if we enabled tracing. | ||
| 333 | * | ||
| 334 | * This saves us some problems with having to write an msr on a | ||
| 335 | * different cpu while preventing others from doing the same for the | ||
| 336 | * next context for that same cpu. | ||
| 337 | */ | ||
| 338 | |||
| 339 | spin_unlock_irqrestore(&ds_lock, irq); | ||
| 340 | |||
| 341 | /* The context might still be in use for context switching. */ | ||
| 342 | if (task && (task != current)) | ||
| 343 | wait_task_context_switch(task); | ||
| 344 | |||
| 345 | kfree(context); | ||
| 346 | } | ||
| 347 | |||
| 348 | static void ds_install_ds_area(struct ds_context *context) | ||
| 349 | { | ||
| 350 | unsigned long ds; | ||
| 351 | |||
| 352 | ds = (unsigned long)context->ds; | ||
| 353 | |||
| 354 | /* | ||
| 355 | * There is a race between the bts master and the pebs master. | ||
| 356 | * | ||
| 357 | * The thread/cpu access is synchronized via get/put_cpu() for | ||
| 358 | * task tracing and via wrmsr_on_cpu for cpu tracing. | ||
| 359 | * | ||
| 360 | * If bts and pebs are collected for the same task or same cpu, | ||
| 361 | * the same confiuration is written twice. | ||
| 362 | */ | ||
| 363 | if (context->task) { | ||
| 364 | get_cpu(); | ||
| 365 | if (context->task == current) | ||
| 366 | wrmsrl(MSR_IA32_DS_AREA, ds); | ||
| 367 | set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | ||
| 368 | put_cpu(); | ||
| 369 | } else | ||
| 370 | wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA, | ||
| 371 | (u32)((u64)ds), (u32)((u64)ds >> 32)); | ||
| 372 | } | ||
| 373 | |||
| 374 | /* | ||
| 375 | * Call the tracer's callback on a buffer overflow. | ||
| 376 | * | ||
| 377 | * context: the ds context | ||
| 378 | * qual: the buffer type | ||
| 379 | */ | ||
| 380 | static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) | ||
| 381 | { | ||
| 382 | switch (qual) { | ||
| 383 | case ds_bts: | ||
| 384 | if (context->bts_master && | ||
| 385 | context->bts_master->ovfl) | ||
| 386 | context->bts_master->ovfl(context->bts_master); | ||
| 387 | break; | ||
| 388 | case ds_pebs: | ||
| 389 | if (context->pebs_master && | ||
| 390 | context->pebs_master->ovfl) | ||
| 391 | context->pebs_master->ovfl(context->pebs_master); | ||
| 392 | break; | ||
| 393 | } | ||
| 394 | } | ||
| 395 | |||
| 396 | |||
| 397 | /* | ||
| 398 | * Write raw data into the BTS or PEBS buffer. | ||
| 399 | * | ||
| 400 | * The remainder of any partially written record is zeroed out. | ||
| 401 | * | ||
| 402 | * context: the DS context | ||
| 403 | * qual: the buffer type | ||
| 404 | * record: the data to write | ||
| 405 | * size: the size of the data | ||
| 406 | */ | ||
| 407 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, | ||
| 408 | const void *record, size_t size) | ||
| 409 | { | ||
| 410 | int bytes_written = 0; | ||
| 411 | |||
| 412 | if (!record) | ||
| 413 | return -EINVAL; | ||
| 414 | |||
| 415 | while (size) { | ||
| 416 | unsigned long base, index, end, write_end, int_th; | ||
| 417 | unsigned long write_size, adj_write_size; | ||
| 418 | |||
| 419 | /* | ||
| 420 | * Write as much as possible without producing an | ||
| 421 | * overflow interrupt. | ||
| 422 | * | ||
| 423 | * Interrupt_threshold must either be | ||
| 424 | * - bigger than absolute_maximum or | ||
| 425 | * - point to a record between buffer_base and absolute_maximum | ||
| 426 | * | ||
| 427 | * Index points to a valid record. | ||
| 428 | */ | ||
| 429 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
| 430 | index = ds_get(context->ds, qual, ds_index); | ||
| 431 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
| 432 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
| 433 | |||
| 434 | write_end = min(end, int_th); | ||
| 435 | |||
| 436 | /* | ||
| 437 | * If we are already beyond the interrupt threshold, | ||
| 438 | * we fill the entire buffer. | ||
| 439 | */ | ||
| 440 | if (write_end <= index) | ||
| 441 | write_end = end; | ||
| 442 | |||
| 443 | if (write_end <= index) | ||
| 444 | break; | ||
| 445 | |||
| 446 | write_size = min((unsigned long) size, write_end - index); | ||
| 447 | memcpy((void *)index, record, write_size); | ||
| 448 | |||
| 449 | record = (const char *)record + write_size; | ||
| 450 | size -= write_size; | ||
| 451 | bytes_written += write_size; | ||
| 452 | |||
| 453 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | ||
| 454 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | ||
| 455 | |||
| 456 | /* Zero out trailing bytes. */ | ||
| 457 | memset((char *)index + write_size, 0, | ||
| 458 | adj_write_size - write_size); | ||
| 459 | index += adj_write_size; | ||
| 460 | |||
| 461 | if (index >= end) | ||
| 462 | index = base; | ||
| 463 | ds_set(context->ds, qual, ds_index, index); | ||
| 464 | |||
| 465 | if (index >= int_th) | ||
| 466 | ds_overflow(context, qual); | ||
| 467 | } | ||
| 468 | |||
| 469 | return bytes_written; | ||
| 470 | } | ||
| 471 | |||
| 472 | |||
| 473 | /* | ||
| 474 | * Branch Trace Store (BTS) uses the following format. Different | ||
| 475 | * architectures vary in the size of those fields. | ||
| 476 | * - source linear address | ||
| 477 | * - destination linear address | ||
| 478 | * - flags | ||
| 479 | * | ||
| 480 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
| 481 | * architectures use 32bit pointers in 32bit mode. | ||
| 482 | * | ||
| 483 | * We compute the base address for the fields based on: | ||
| 484 | * - the field size stored in the DS configuration | ||
| 485 | * - the relative field position | ||
| 486 | * | ||
| 487 | * In order to store additional information in the BTS buffer, we use | ||
| 488 | * a special source address to indicate that the record requires | ||
| 489 | * special interpretation. | ||
| 490 | * | ||
| 491 | * Netburst indicated via a bit in the flags field whether the branch | ||
| 492 | * was predicted; this is ignored. | ||
| 493 | * | ||
| 494 | * We use two levels of abstraction: | ||
| 495 | * - the raw data level defined here | ||
| 496 | * - an arch-independent level defined in ds.h | ||
| 497 | */ | ||
| 498 | |||
| 499 | enum bts_field { | ||
| 500 | bts_from, | ||
| 501 | bts_to, | ||
| 502 | bts_flags, | ||
| 503 | |||
| 504 | bts_qual = bts_from, | ||
| 505 | bts_clock = bts_to, | ||
| 506 | bts_pid = bts_flags, | ||
| 507 | |||
| 508 | bts_qual_mask = (bts_qual_max - 1), | ||
| 509 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | ||
| 510 | }; | ||
| 511 | |||
| 512 | static inline unsigned long bts_get(const char *base, unsigned long field) | ||
| 513 | { | ||
| 514 | base += (ds_cfg.sizeof_ptr_field * field); | ||
| 515 | return *(unsigned long *)base; | ||
| 516 | } | ||
| 517 | |||
| 518 | static inline void bts_set(char *base, unsigned long field, unsigned long val) | ||
| 519 | { | ||
| 520 | base += (ds_cfg.sizeof_ptr_field * field); | ||
| 521 | (*(unsigned long *)base) = val; | ||
| 522 | } | ||
| 523 | |||
| 524 | |||
| 525 | /* | ||
| 526 | * The raw BTS data is architecture dependent. | ||
| 527 | * | ||
| 528 | * For higher-level users, we give an arch-independent view. | ||
| 529 | * - ds.h defines struct bts_struct | ||
| 530 | * - bts_read translates one raw bts record into a bts_struct | ||
| 531 | * - bts_write translates one bts_struct into the raw format and | ||
| 532 | * writes it into the top of the parameter tracer's buffer. | ||
| 533 | * | ||
| 534 | * return: bytes read/written on success; -Eerrno, otherwise | ||
| 535 | */ | ||
| 536 | static int | ||
| 537 | bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out) | ||
| 538 | { | ||
| 539 | if (!tracer) | ||
| 540 | return -EINVAL; | ||
| 541 | |||
| 542 | if (at < tracer->trace.ds.begin) | ||
| 543 | return -EINVAL; | ||
| 544 | |||
| 545 | if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) | ||
| 546 | return -EINVAL; | ||
| 547 | |||
| 548 | memset(out, 0, sizeof(*out)); | ||
| 549 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { | ||
| 550 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); | ||
| 551 | out->variant.event.clock = bts_get(at, bts_clock); | ||
| 552 | out->variant.event.pid = bts_get(at, bts_pid); | ||
| 553 | } else { | ||
| 554 | out->qualifier = bts_branch; | ||
| 555 | out->variant.lbr.from = bts_get(at, bts_from); | ||
| 556 | out->variant.lbr.to = bts_get(at, bts_to); | ||
| 557 | |||
| 558 | if (!out->variant.lbr.from && !out->variant.lbr.to) | ||
| 559 | out->qualifier = bts_invalid; | ||
| 560 | } | ||
| 561 | |||
| 562 | return ds_cfg.sizeof_rec[ds_bts]; | ||
| 563 | } | ||
| 564 | |||
| 565 | static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) | ||
| 566 | { | ||
| 567 | unsigned char raw[MAX_SIZEOF_BTS]; | ||
| 568 | |||
| 569 | if (!tracer) | ||
| 570 | return -EINVAL; | ||
| 571 | |||
| 572 | if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) | ||
| 573 | return -EOVERFLOW; | ||
| 574 | |||
| 575 | switch (in->qualifier) { | ||
| 576 | case bts_invalid: | ||
| 577 | bts_set(raw, bts_from, 0); | ||
| 578 | bts_set(raw, bts_to, 0); | ||
| 579 | bts_set(raw, bts_flags, 0); | ||
| 580 | break; | ||
| 581 | case bts_branch: | ||
| 582 | bts_set(raw, bts_from, in->variant.lbr.from); | ||
| 583 | bts_set(raw, bts_to, in->variant.lbr.to); | ||
| 584 | bts_set(raw, bts_flags, 0); | ||
| 585 | break; | ||
| 586 | case bts_task_arrives: | ||
| 587 | case bts_task_departs: | ||
| 588 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | ||
| 589 | bts_set(raw, bts_clock, in->variant.event.clock); | ||
| 590 | bts_set(raw, bts_pid, in->variant.event.pid); | ||
| 591 | break; | ||
| 592 | default: | ||
| 593 | return -EINVAL; | ||
| 594 | } | ||
| 595 | |||
| 596 | return ds_write(tracer->ds.context, ds_bts, raw, | ||
| 597 | ds_cfg.sizeof_rec[ds_bts]); | ||
| 598 | } | ||
| 599 | |||
| 600 | |||
| 601 | static void ds_write_config(struct ds_context *context, | ||
| 602 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
| 603 | { | ||
| 604 | unsigned char *ds = context->ds; | ||
| 605 | |||
| 606 | ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); | ||
| 607 | ds_set(ds, qual, ds_index, (unsigned long)cfg->top); | ||
| 608 | ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); | ||
| 609 | ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); | ||
| 610 | } | ||
| 611 | |||
| 612 | static void ds_read_config(struct ds_context *context, | ||
| 613 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
| 614 | { | ||
| 615 | unsigned char *ds = context->ds; | ||
| 616 | |||
| 617 | cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); | ||
| 618 | cfg->top = (void *)ds_get(ds, qual, ds_index); | ||
| 619 | cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); | ||
| 620 | cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); | ||
| 621 | } | ||
| 622 | |||
| 623 | static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | ||
| 624 | void *base, size_t size, size_t ith, | ||
| 625 | unsigned int flags) { | ||
| 626 | unsigned long buffer, adj; | ||
| 627 | |||
| 628 | /* | ||
| 629 | * Adjust the buffer address and size to meet alignment | ||
| 630 | * constraints: | ||
| 631 | * - buffer is double-word aligned | ||
| 632 | * - size is multiple of record size | ||
| 633 | * | ||
| 634 | * We checked the size at the very beginning; we have enough | ||
| 635 | * space to do the adjustment. | ||
| 636 | */ | ||
| 637 | buffer = (unsigned long)base; | ||
| 638 | |||
| 639 | adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; | ||
| 640 | buffer += adj; | ||
| 641 | size -= adj; | ||
| 642 | |||
| 643 | trace->n = size / ds_cfg.sizeof_rec[qual]; | ||
| 644 | trace->size = ds_cfg.sizeof_rec[qual]; | ||
| 645 | |||
| 646 | size = (trace->n * trace->size); | ||
| 647 | |||
| 648 | trace->begin = (void *)buffer; | ||
| 649 | trace->top = trace->begin; | ||
| 650 | trace->end = (void *)(buffer + size); | ||
| 651 | /* | ||
| 652 | * The value for 'no threshold' is -1, which will set the | ||
| 653 | * threshold outside of the buffer, just like we want it. | ||
| 654 | */ | ||
| 655 | ith *= ds_cfg.sizeof_rec[qual]; | ||
| 656 | trace->ith = (void *)(buffer + size - ith); | ||
| 657 | |||
| 658 | trace->flags = flags; | ||
| 659 | } | ||
| 660 | |||
| 661 | |||
| 662 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, | ||
| 663 | enum ds_qualifier qual, struct task_struct *task, | ||
| 664 | int cpu, void *base, size_t size, size_t th) | ||
| 665 | { | ||
| 666 | struct ds_context *context; | ||
| 667 | int error; | ||
| 668 | size_t req_size; | ||
| 669 | |||
| 670 | error = -EOPNOTSUPP; | ||
| 671 | if (!ds_cfg.sizeof_rec[qual]) | ||
| 672 | goto out; | ||
| 673 | |||
| 674 | error = -EINVAL; | ||
| 675 | if (!base) | ||
| 676 | goto out; | ||
| 677 | |||
| 678 | req_size = ds_cfg.sizeof_rec[qual]; | ||
| 679 | /* We might need space for alignment adjustments. */ | ||
| 680 | if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT)) | ||
| 681 | req_size += DS_ALIGNMENT; | ||
| 682 | |||
| 683 | error = -EINVAL; | ||
| 684 | if (size < req_size) | ||
| 685 | goto out; | ||
| 686 | |||
| 687 | if (th != (size_t)-1) { | ||
| 688 | th *= ds_cfg.sizeof_rec[qual]; | ||
| 689 | |||
| 690 | error = -EINVAL; | ||
| 691 | if (size <= th) | ||
| 692 | goto out; | ||
| 693 | } | ||
| 694 | |||
| 695 | tracer->buffer = base; | ||
| 696 | tracer->size = size; | ||
| 697 | |||
| 698 | error = -ENOMEM; | ||
| 699 | context = ds_get_context(task, cpu); | ||
| 700 | if (!context) | ||
| 701 | goto out; | ||
| 702 | tracer->context = context; | ||
| 703 | |||
| 704 | /* | ||
| 705 | * Defer any tracer-specific initialization work for the context until | ||
| 706 | * context ownership has been clarified. | ||
| 707 | */ | ||
| 708 | |||
| 709 | error = 0; | ||
| 710 | out: | ||
| 711 | return error; | ||
| 712 | } | ||
| 713 | |||
| 714 | static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu, | ||
| 715 | void *base, size_t size, | ||
| 716 | bts_ovfl_callback_t ovfl, size_t th, | ||
| 717 | unsigned int flags) | ||
| 718 | { | ||
| 719 | struct bts_tracer *tracer; | ||
| 720 | int error; | ||
| 721 | |||
| 722 | /* Buffer overflow notification is not yet implemented. */ | ||
| 723 | error = -EOPNOTSUPP; | ||
| 724 | if (ovfl) | ||
| 725 | goto out; | ||
| 726 | |||
| 727 | error = get_tracer(task); | ||
| 728 | if (error < 0) | ||
| 729 | goto out; | ||
| 730 | |||
| 731 | error = -ENOMEM; | ||
| 732 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | ||
| 733 | if (!tracer) | ||
| 734 | goto out_put_tracer; | ||
| 735 | tracer->ovfl = ovfl; | ||
| 736 | |||
| 737 | /* Do some more error checking and acquire a tracing context. */ | ||
| 738 | error = ds_request(&tracer->ds, &tracer->trace.ds, | ||
| 739 | ds_bts, task, cpu, base, size, th); | ||
| 740 | if (error < 0) | ||
| 741 | goto out_tracer; | ||
| 742 | |||
| 743 | /* Claim the bts part of the tracing context we acquired above. */ | ||
| 744 | spin_lock_irq(&ds_lock); | ||
| 745 | |||
| 746 | error = -EPERM; | ||
| 747 | if (tracer->ds.context->bts_master) | ||
| 748 | goto out_unlock; | ||
| 749 | tracer->ds.context->bts_master = tracer; | ||
| 750 | |||
| 751 | spin_unlock_irq(&ds_lock); | ||
| 752 | |||
| 753 | /* | ||
| 754 | * Now that we own the bts part of the context, let's complete the | ||
| 755 | * initialization for that part. | ||
| 756 | */ | ||
| 757 | ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags); | ||
| 758 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
| 759 | ds_install_ds_area(tracer->ds.context); | ||
| 760 | |||
| 761 | tracer->trace.read = bts_read; | ||
| 762 | tracer->trace.write = bts_write; | ||
| 763 | |||
| 764 | /* Start tracing. */ | ||
| 765 | ds_resume_bts(tracer); | ||
| 766 | |||
| 767 | return tracer; | ||
| 768 | |||
| 769 | out_unlock: | ||
| 770 | spin_unlock_irq(&ds_lock); | ||
| 771 | ds_put_context(tracer->ds.context); | ||
| 772 | out_tracer: | ||
| 773 | kfree(tracer); | ||
| 774 | out_put_tracer: | ||
| 775 | put_tracer(task); | ||
| 776 | out: | ||
| 777 | return ERR_PTR(error); | ||
| 778 | } | ||
| 779 | |||
| 780 | struct bts_tracer *ds_request_bts_task(struct task_struct *task, | ||
| 781 | void *base, size_t size, | ||
| 782 | bts_ovfl_callback_t ovfl, | ||
| 783 | size_t th, unsigned int flags) | ||
| 784 | { | ||
| 785 | return ds_request_bts(task, 0, base, size, ovfl, th, flags); | ||
| 786 | } | ||
| 787 | |||
| 788 | struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, | ||
| 789 | bts_ovfl_callback_t ovfl, | ||
| 790 | size_t th, unsigned int flags) | ||
| 791 | { | ||
| 792 | return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags); | ||
| 793 | } | ||
| 794 | |||
| 795 | static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu, | ||
| 796 | void *base, size_t size, | ||
| 797 | pebs_ovfl_callback_t ovfl, size_t th, | ||
| 798 | unsigned int flags) | ||
| 799 | { | ||
| 800 | struct pebs_tracer *tracer; | ||
| 801 | int error; | ||
| 802 | |||
| 803 | /* Buffer overflow notification is not yet implemented. */ | ||
| 804 | error = -EOPNOTSUPP; | ||
| 805 | if (ovfl) | ||
| 806 | goto out; | ||
| 807 | |||
| 808 | error = get_tracer(task); | ||
| 809 | if (error < 0) | ||
| 810 | goto out; | ||
| 811 | |||
| 812 | error = -ENOMEM; | ||
| 813 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | ||
| 814 | if (!tracer) | ||
| 815 | goto out_put_tracer; | ||
| 816 | tracer->ovfl = ovfl; | ||
| 817 | |||
| 818 | /* Do some more error checking and acquire a tracing context. */ | ||
| 819 | error = ds_request(&tracer->ds, &tracer->trace.ds, | ||
| 820 | ds_pebs, task, cpu, base, size, th); | ||
| 821 | if (error < 0) | ||
| 822 | goto out_tracer; | ||
| 823 | |||
| 824 | /* Claim the pebs part of the tracing context we acquired above. */ | ||
| 825 | spin_lock_irq(&ds_lock); | ||
| 826 | |||
| 827 | error = -EPERM; | ||
| 828 | if (tracer->ds.context->pebs_master) | ||
| 829 | goto out_unlock; | ||
| 830 | tracer->ds.context->pebs_master = tracer; | ||
| 831 | |||
| 832 | spin_unlock_irq(&ds_lock); | ||
| 833 | |||
| 834 | /* | ||
| 835 | * Now that we own the pebs part of the context, let's complete the | ||
| 836 | * initialization for that part. | ||
| 837 | */ | ||
| 838 | ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags); | ||
| 839 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | ||
| 840 | ds_install_ds_area(tracer->ds.context); | ||
| 841 | |||
| 842 | /* Start tracing. */ | ||
| 843 | ds_resume_pebs(tracer); | ||
| 844 | |||
| 845 | return tracer; | ||
| 846 | |||
| 847 | out_unlock: | ||
| 848 | spin_unlock_irq(&ds_lock); | ||
| 849 | ds_put_context(tracer->ds.context); | ||
| 850 | out_tracer: | ||
| 851 | kfree(tracer); | ||
| 852 | out_put_tracer: | ||
| 853 | put_tracer(task); | ||
| 854 | out: | ||
| 855 | return ERR_PTR(error); | ||
| 856 | } | ||
| 857 | |||
| 858 | struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, | ||
| 859 | void *base, size_t size, | ||
| 860 | pebs_ovfl_callback_t ovfl, | ||
| 861 | size_t th, unsigned int flags) | ||
| 862 | { | ||
| 863 | return ds_request_pebs(task, 0, base, size, ovfl, th, flags); | ||
| 864 | } | ||
| 865 | |||
| 866 | struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size, | ||
| 867 | pebs_ovfl_callback_t ovfl, | ||
| 868 | size_t th, unsigned int flags) | ||
| 869 | { | ||
| 870 | return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags); | ||
| 871 | } | ||
| 872 | |||
| 873 | static void ds_free_bts(struct bts_tracer *tracer) | ||
| 874 | { | ||
| 875 | struct task_struct *task; | ||
| 876 | |||
| 877 | task = tracer->ds.context->task; | ||
| 878 | |||
| 879 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); | ||
| 880 | tracer->ds.context->bts_master = NULL; | ||
| 881 | |||
| 882 | /* Make sure tracing stopped and the tracer is not in use. */ | ||
| 883 | if (task && (task != current)) | ||
| 884 | wait_task_context_switch(task); | ||
| 885 | |||
| 886 | ds_put_context(tracer->ds.context); | ||
| 887 | put_tracer(task); | ||
| 888 | |||
| 889 | kfree(tracer); | ||
| 890 | } | ||
| 891 | |||
| 892 | void ds_release_bts(struct bts_tracer *tracer) | ||
| 893 | { | ||
| 894 | might_sleep(); | ||
| 895 | |||
| 896 | if (!tracer) | ||
| 897 | return; | ||
| 898 | |||
| 899 | ds_suspend_bts(tracer); | ||
| 900 | ds_free_bts(tracer); | ||
| 901 | } | ||
| 902 | |||
| 903 | int ds_release_bts_noirq(struct bts_tracer *tracer) | ||
| 904 | { | ||
| 905 | struct task_struct *task; | ||
| 906 | unsigned long irq; | ||
| 907 | int error; | ||
| 908 | |||
| 909 | if (!tracer) | ||
| 910 | return 0; | ||
| 911 | |||
| 912 | task = tracer->ds.context->task; | ||
| 913 | |||
| 914 | local_irq_save(irq); | ||
| 915 | |||
| 916 | error = -EPERM; | ||
| 917 | if (!task && | ||
| 918 | (tracer->ds.context->cpu != smp_processor_id())) | ||
| 919 | goto out; | ||
| 920 | |||
| 921 | error = -EPERM; | ||
| 922 | if (task && (task != current)) | ||
| 923 | goto out; | ||
| 924 | |||
| 925 | ds_suspend_bts_noirq(tracer); | ||
| 926 | ds_free_bts(tracer); | ||
| 927 | |||
| 928 | error = 0; | ||
| 929 | out: | ||
| 930 | local_irq_restore(irq); | ||
| 931 | return error; | ||
| 932 | } | ||
| 933 | |||
| 934 | static void update_task_debugctlmsr(struct task_struct *task, | ||
| 935 | unsigned long debugctlmsr) | ||
| 936 | { | ||
| 937 | task->thread.debugctlmsr = debugctlmsr; | ||
| 938 | |||
| 939 | get_cpu(); | ||
| 940 | if (task == current) | ||
| 941 | update_debugctlmsr(debugctlmsr); | ||
| 942 | put_cpu(); | ||
| 943 | } | ||
| 944 | |||
| 945 | void ds_suspend_bts(struct bts_tracer *tracer) | ||
| 946 | { | ||
| 947 | struct task_struct *task; | ||
| 948 | unsigned long debugctlmsr; | ||
| 949 | int cpu; | ||
| 950 | |||
| 951 | if (!tracer) | ||
| 952 | return; | ||
| 953 | |||
| 954 | tracer->flags = 0; | ||
| 955 | |||
| 956 | task = tracer->ds.context->task; | ||
| 957 | cpu = tracer->ds.context->cpu; | ||
| 958 | |||
| 959 | WARN_ON(!task && irqs_disabled()); | ||
| 960 | |||
| 961 | debugctlmsr = (task ? | ||
| 962 | task->thread.debugctlmsr : | ||
| 963 | get_debugctlmsr_on_cpu(cpu)); | ||
| 964 | debugctlmsr &= ~BTS_CONTROL; | ||
| 965 | |||
| 966 | if (task) | ||
| 967 | update_task_debugctlmsr(task, debugctlmsr); | ||
| 968 | else | ||
| 969 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
| 970 | } | ||
| 971 | |||
| 972 | int ds_suspend_bts_noirq(struct bts_tracer *tracer) | ||
| 973 | { | ||
| 974 | struct task_struct *task; | ||
| 975 | unsigned long debugctlmsr, irq; | ||
| 976 | int cpu, error = 0; | ||
| 977 | |||
| 978 | if (!tracer) | ||
| 979 | return 0; | ||
| 980 | |||
| 981 | tracer->flags = 0; | ||
| 982 | |||
| 983 | task = tracer->ds.context->task; | ||
| 984 | cpu = tracer->ds.context->cpu; | ||
| 985 | |||
| 986 | local_irq_save(irq); | ||
| 987 | |||
| 988 | error = -EPERM; | ||
| 989 | if (!task && (cpu != smp_processor_id())) | ||
| 990 | goto out; | ||
| 991 | |||
| 992 | debugctlmsr = (task ? | ||
| 993 | task->thread.debugctlmsr : | ||
| 994 | get_debugctlmsr()); | ||
| 995 | debugctlmsr &= ~BTS_CONTROL; | ||
| 996 | |||
| 997 | if (task) | ||
| 998 | update_task_debugctlmsr(task, debugctlmsr); | ||
| 999 | else | ||
| 1000 | update_debugctlmsr(debugctlmsr); | ||
| 1001 | |||
| 1002 | error = 0; | ||
| 1003 | out: | ||
| 1004 | local_irq_restore(irq); | ||
| 1005 | return error; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | static unsigned long ds_bts_control(struct bts_tracer *tracer) | ||
| 1009 | { | ||
| 1010 | unsigned long control; | ||
| 1011 | |||
| 1012 | control = ds_cfg.ctl[dsf_bts]; | ||
| 1013 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) | ||
| 1014 | control |= ds_cfg.ctl[dsf_bts_kernel]; | ||
| 1015 | if (!(tracer->trace.ds.flags & BTS_USER)) | ||
| 1016 | control |= ds_cfg.ctl[dsf_bts_user]; | ||
| 1017 | |||
| 1018 | return control; | ||
| 1019 | } | ||
| 1020 | |||
| 1021 | void ds_resume_bts(struct bts_tracer *tracer) | ||
| 1022 | { | ||
| 1023 | struct task_struct *task; | ||
| 1024 | unsigned long debugctlmsr; | ||
| 1025 | int cpu; | ||
| 1026 | |||
| 1027 | if (!tracer) | ||
| 1028 | return; | ||
| 1029 | |||
| 1030 | tracer->flags = tracer->trace.ds.flags; | ||
| 1031 | |||
| 1032 | task = tracer->ds.context->task; | ||
| 1033 | cpu = tracer->ds.context->cpu; | ||
| 1034 | |||
| 1035 | WARN_ON(!task && irqs_disabled()); | ||
| 1036 | |||
| 1037 | debugctlmsr = (task ? | ||
| 1038 | task->thread.debugctlmsr : | ||
| 1039 | get_debugctlmsr_on_cpu(cpu)); | ||
| 1040 | debugctlmsr |= ds_bts_control(tracer); | ||
| 1041 | |||
| 1042 | if (task) | ||
| 1043 | update_task_debugctlmsr(task, debugctlmsr); | ||
| 1044 | else | ||
| 1045 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | int ds_resume_bts_noirq(struct bts_tracer *tracer) | ||
| 1049 | { | ||
| 1050 | struct task_struct *task; | ||
| 1051 | unsigned long debugctlmsr, irq; | ||
| 1052 | int cpu, error = 0; | ||
| 1053 | |||
| 1054 | if (!tracer) | ||
| 1055 | return 0; | ||
| 1056 | |||
| 1057 | tracer->flags = tracer->trace.ds.flags; | ||
| 1058 | |||
| 1059 | task = tracer->ds.context->task; | ||
| 1060 | cpu = tracer->ds.context->cpu; | ||
| 1061 | |||
| 1062 | local_irq_save(irq); | ||
| 1063 | |||
| 1064 | error = -EPERM; | ||
| 1065 | if (!task && (cpu != smp_processor_id())) | ||
| 1066 | goto out; | ||
| 1067 | |||
| 1068 | debugctlmsr = (task ? | ||
| 1069 | task->thread.debugctlmsr : | ||
| 1070 | get_debugctlmsr()); | ||
| 1071 | debugctlmsr |= ds_bts_control(tracer); | ||
| 1072 | |||
| 1073 | if (task) | ||
| 1074 | update_task_debugctlmsr(task, debugctlmsr); | ||
| 1075 | else | ||
| 1076 | update_debugctlmsr(debugctlmsr); | ||
| 1077 | |||
| 1078 | error = 0; | ||
| 1079 | out: | ||
| 1080 | local_irq_restore(irq); | ||
| 1081 | return error; | ||
| 1082 | } | ||
| 1083 | |||
| 1084 | static void ds_free_pebs(struct pebs_tracer *tracer) | ||
| 1085 | { | ||
| 1086 | struct task_struct *task; | ||
| 1087 | |||
| 1088 | task = tracer->ds.context->task; | ||
| 1089 | |||
| 1090 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); | ||
| 1091 | tracer->ds.context->pebs_master = NULL; | ||
| 1092 | |||
| 1093 | ds_put_context(tracer->ds.context); | ||
| 1094 | put_tracer(task); | ||
| 1095 | |||
| 1096 | kfree(tracer); | ||
| 1097 | } | ||
| 1098 | |||
| 1099 | void ds_release_pebs(struct pebs_tracer *tracer) | ||
| 1100 | { | ||
| 1101 | might_sleep(); | ||
| 1102 | |||
| 1103 | if (!tracer) | ||
| 1104 | return; | ||
| 1105 | |||
| 1106 | ds_suspend_pebs(tracer); | ||
| 1107 | ds_free_pebs(tracer); | ||
| 1108 | } | ||
| 1109 | |||
| 1110 | int ds_release_pebs_noirq(struct pebs_tracer *tracer) | ||
| 1111 | { | ||
| 1112 | struct task_struct *task; | ||
| 1113 | unsigned long irq; | ||
| 1114 | int error; | ||
| 1115 | |||
| 1116 | if (!tracer) | ||
| 1117 | return 0; | ||
| 1118 | |||
| 1119 | task = tracer->ds.context->task; | ||
| 1120 | |||
| 1121 | local_irq_save(irq); | ||
| 1122 | |||
| 1123 | error = -EPERM; | ||
| 1124 | if (!task && | ||
| 1125 | (tracer->ds.context->cpu != smp_processor_id())) | ||
| 1126 | goto out; | ||
| 1127 | |||
| 1128 | error = -EPERM; | ||
| 1129 | if (task && (task != current)) | ||
| 1130 | goto out; | ||
| 1131 | |||
| 1132 | ds_suspend_pebs_noirq(tracer); | ||
| 1133 | ds_free_pebs(tracer); | ||
| 1134 | |||
| 1135 | error = 0; | ||
| 1136 | out: | ||
| 1137 | local_irq_restore(irq); | ||
| 1138 | return error; | ||
| 1139 | } | ||
| 1140 | |||
| 1141 | void ds_suspend_pebs(struct pebs_tracer *tracer) | ||
| 1142 | { | ||
| 1143 | |||
| 1144 | } | ||
| 1145 | |||
| 1146 | int ds_suspend_pebs_noirq(struct pebs_tracer *tracer) | ||
| 1147 | { | ||
| 1148 | return 0; | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | void ds_resume_pebs(struct pebs_tracer *tracer) | ||
| 1152 | { | ||
| 1153 | |||
| 1154 | } | ||
| 1155 | |||
| 1156 | int ds_resume_pebs_noirq(struct pebs_tracer *tracer) | ||
| 1157 | { | ||
| 1158 | return 0; | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) | ||
| 1162 | { | ||
| 1163 | if (!tracer) | ||
| 1164 | return NULL; | ||
| 1165 | |||
| 1166 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
| 1167 | return &tracer->trace; | ||
| 1168 | } | ||
| 1169 | |||
| 1170 | const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) | ||
| 1171 | { | ||
| 1172 | if (!tracer) | ||
| 1173 | return NULL; | ||
| 1174 | |||
| 1175 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | ||
| 1176 | |||
| 1177 | tracer->trace.counters = ds_cfg.nr_counter_reset; | ||
| 1178 | memcpy(tracer->trace.counter_reset, | ||
| 1179 | tracer->ds.context->ds + | ||
| 1180 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field), | ||
| 1181 | ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE); | ||
| 1182 | |||
| 1183 | return &tracer->trace; | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | int ds_reset_bts(struct bts_tracer *tracer) | ||
| 1187 | { | ||
| 1188 | if (!tracer) | ||
| 1189 | return -EINVAL; | ||
| 1190 | |||
| 1191 | tracer->trace.ds.top = tracer->trace.ds.begin; | ||
| 1192 | |||
| 1193 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, | ||
| 1194 | (unsigned long)tracer->trace.ds.top); | ||
| 1195 | |||
| 1196 | return 0; | ||
| 1197 | } | ||
| 1198 | |||
| 1199 | int ds_reset_pebs(struct pebs_tracer *tracer) | ||
| 1200 | { | ||
| 1201 | if (!tracer) | ||
| 1202 | return -EINVAL; | ||
| 1203 | |||
| 1204 | tracer->trace.ds.top = tracer->trace.ds.begin; | ||
| 1205 | |||
| 1206 | ds_set(tracer->ds.context->ds, ds_pebs, ds_index, | ||
| 1207 | (unsigned long)tracer->trace.ds.top); | ||
| 1208 | |||
| 1209 | return 0; | ||
| 1210 | } | ||
| 1211 | |||
| 1212 | int ds_set_pebs_reset(struct pebs_tracer *tracer, | ||
| 1213 | unsigned int counter, u64 value) | ||
| 1214 | { | ||
| 1215 | if (!tracer) | ||
| 1216 | return -EINVAL; | ||
| 1217 | |||
| 1218 | if (ds_cfg.nr_counter_reset < counter) | ||
| 1219 | return -EINVAL; | ||
| 1220 | |||
| 1221 | *(u64 *)(tracer->ds.context->ds + | ||
| 1222 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) + | ||
| 1223 | (counter * PEBS_RESET_FIELD_SIZE)) = value; | ||
| 1224 | |||
| 1225 | return 0; | ||
| 1226 | } | ||
| 1227 | |||
| 1228 | static const struct ds_configuration ds_cfg_netburst = { | ||
| 1229 | .name = "Netburst", | ||
| 1230 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), | ||
| 1231 | .ctl[dsf_bts_kernel] = (1 << 5), | ||
| 1232 | .ctl[dsf_bts_user] = (1 << 6), | ||
| 1233 | .nr_counter_reset = 1, | ||
| 1234 | }; | ||
| 1235 | static const struct ds_configuration ds_cfg_pentium_m = { | ||
| 1236 | .name = "Pentium M", | ||
| 1237 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
| 1238 | .nr_counter_reset = 1, | ||
| 1239 | }; | ||
| 1240 | static const struct ds_configuration ds_cfg_core2_atom = { | ||
| 1241 | .name = "Core 2/Atom", | ||
| 1242 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
| 1243 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
| 1244 | .ctl[dsf_bts_user] = (1 << 10), | ||
| 1245 | .nr_counter_reset = 1, | ||
| 1246 | }; | ||
| 1247 | static const struct ds_configuration ds_cfg_core_i7 = { | ||
| 1248 | .name = "Core i7", | ||
| 1249 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
| 1250 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
| 1251 | .ctl[dsf_bts_user] = (1 << 10), | ||
| 1252 | .nr_counter_reset = 4, | ||
| 1253 | }; | ||
| 1254 | |||
| 1255 | static void | ||
| 1256 | ds_configure(const struct ds_configuration *cfg, | ||
| 1257 | struct cpuinfo_x86 *cpu) | ||
| 1258 | { | ||
| 1259 | unsigned long nr_pebs_fields = 0; | ||
| 1260 | |||
| 1261 | printk(KERN_INFO "[ds] using %s configuration\n", cfg->name); | ||
| 1262 | |||
| 1263 | #ifdef __i386__ | ||
| 1264 | nr_pebs_fields = 10; | ||
| 1265 | #else | ||
| 1266 | nr_pebs_fields = 18; | ||
| 1267 | #endif | ||
| 1268 | |||
| 1269 | /* | ||
| 1270 | * Starting with version 2, architectural performance | ||
| 1271 | * monitoring supports a format specifier. | ||
| 1272 | */ | ||
| 1273 | if ((cpuid_eax(0xa) & 0xff) > 1) { | ||
| 1274 | unsigned long perf_capabilities, format; | ||
| 1275 | |||
| 1276 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities); | ||
| 1277 | |||
| 1278 | format = (perf_capabilities >> 8) & 0xf; | ||
| 1279 | |||
| 1280 | switch (format) { | ||
| 1281 | case 0: | ||
| 1282 | nr_pebs_fields = 18; | ||
| 1283 | break; | ||
| 1284 | case 1: | ||
| 1285 | nr_pebs_fields = 22; | ||
| 1286 | break; | ||
| 1287 | default: | ||
| 1288 | printk(KERN_INFO | ||
| 1289 | "[ds] unknown PEBS format: %lu\n", format); | ||
| 1290 | nr_pebs_fields = 0; | ||
| 1291 | break; | ||
| 1292 | } | ||
| 1293 | } | ||
| 1294 | |||
| 1295 | memset(&ds_cfg, 0, sizeof(ds_cfg)); | ||
| 1296 | ds_cfg = *cfg; | ||
| 1297 | |||
| 1298 | ds_cfg.sizeof_ptr_field = | ||
| 1299 | (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4); | ||
| 1300 | |||
| 1301 | ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3; | ||
| 1302 | ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields; | ||
| 1303 | |||
| 1304 | if (!cpu_has(cpu, X86_FEATURE_BTS)) { | ||
| 1305 | ds_cfg.sizeof_rec[ds_bts] = 0; | ||
| 1306 | printk(KERN_INFO "[ds] bts not available\n"); | ||
| 1307 | } | ||
| 1308 | if (!cpu_has(cpu, X86_FEATURE_PEBS)) { | ||
| 1309 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
| 1310 | printk(KERN_INFO "[ds] pebs not available\n"); | ||
| 1311 | } | ||
| 1312 | |||
| 1313 | printk(KERN_INFO "[ds] sizes: address: %u bit, ", | ||
| 1314 | 8 * ds_cfg.sizeof_ptr_field); | ||
| 1315 | printk("bts/pebs record: %u/%u bytes\n", | ||
| 1316 | ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]); | ||
| 1317 | |||
| 1318 | WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset); | ||
| 1319 | } | ||
| 1320 | |||
| 1321 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | ||
| 1322 | { | ||
| 1323 | /* Only configure the first cpu. Others are identical. */ | ||
| 1324 | if (ds_cfg.name) | ||
| 1325 | return; | ||
| 1326 | |||
| 1327 | switch (c->x86) { | ||
| 1328 | case 0x6: | ||
| 1329 | switch (c->x86_model) { | ||
| 1330 | case 0x9: | ||
| 1331 | case 0xd: /* Pentium M */ | ||
| 1332 | ds_configure(&ds_cfg_pentium_m, c); | ||
| 1333 | break; | ||
| 1334 | case 0xf: | ||
| 1335 | case 0x17: /* Core2 */ | ||
| 1336 | case 0x1c: /* Atom */ | ||
| 1337 | ds_configure(&ds_cfg_core2_atom, c); | ||
| 1338 | break; | ||
| 1339 | case 0x1a: /* Core i7 */ | ||
| 1340 | ds_configure(&ds_cfg_core_i7, c); | ||
| 1341 | break; | ||
| 1342 | default: | ||
| 1343 | /* Sorry, don't know about them. */ | ||
| 1344 | break; | ||
| 1345 | } | ||
| 1346 | break; | ||
| 1347 | case 0xf: | ||
| 1348 | switch (c->x86_model) { | ||
| 1349 | case 0x0: | ||
| 1350 | case 0x1: | ||
| 1351 | case 0x2: /* Netburst */ | ||
| 1352 | ds_configure(&ds_cfg_netburst, c); | ||
| 1353 | break; | ||
| 1354 | default: | ||
| 1355 | /* Sorry, don't know about them. */ | ||
| 1356 | break; | ||
| 1357 | } | ||
| 1358 | break; | ||
| 1359 | default: | ||
| 1360 | /* Sorry, don't know about them. */ | ||
| 1361 | break; | ||
| 1362 | } | ||
| 1363 | } | ||
| 1364 | |||
| 1365 | static inline void ds_take_timestamp(struct ds_context *context, | ||
| 1366 | enum bts_qualifier qualifier, | ||
| 1367 | struct task_struct *task) | ||
| 1368 | { | ||
| 1369 | struct bts_tracer *tracer = context->bts_master; | ||
| 1370 | struct bts_struct ts; | ||
| 1371 | |||
| 1372 | /* Prevent compilers from reading the tracer pointer twice. */ | ||
| 1373 | barrier(); | ||
| 1374 | |||
| 1375 | if (!tracer || !(tracer->flags & BTS_TIMESTAMPS)) | ||
| 1376 | return; | ||
| 1377 | |||
| 1378 | memset(&ts, 0, sizeof(ts)); | ||
| 1379 | ts.qualifier = qualifier; | ||
| 1380 | ts.variant.event.clock = trace_clock_global(); | ||
| 1381 | ts.variant.event.pid = task->pid; | ||
| 1382 | |||
| 1383 | bts_write(tracer, &ts); | ||
| 1384 | } | ||
| 1385 | |||
| 1386 | /* | ||
| 1387 | * Change the DS configuration from tracing prev to tracing next. | ||
| 1388 | */ | ||
| 1389 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | ||
| 1390 | { | ||
| 1391 | struct ds_context *prev_ctx = prev->thread.ds_ctx; | ||
| 1392 | struct ds_context *next_ctx = next->thread.ds_ctx; | ||
| 1393 | unsigned long debugctlmsr = next->thread.debugctlmsr; | ||
| 1394 | |||
| 1395 | /* Make sure all data is read before we start. */ | ||
| 1396 | barrier(); | ||
| 1397 | |||
| 1398 | if (prev_ctx) { | ||
| 1399 | update_debugctlmsr(0); | ||
| 1400 | |||
| 1401 | ds_take_timestamp(prev_ctx, bts_task_departs, prev); | ||
| 1402 | } | ||
| 1403 | |||
| 1404 | if (next_ctx) { | ||
| 1405 | ds_take_timestamp(next_ctx, bts_task_arrives, next); | ||
| 1406 | |||
| 1407 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | ||
| 1408 | } | ||
| 1409 | |||
| 1410 | update_debugctlmsr(debugctlmsr); | ||
| 1411 | } | ||
| 1412 | |||
| 1413 | static __init int ds_selftest(void) | ||
| 1414 | { | ||
| 1415 | if (ds_cfg.sizeof_rec[ds_bts]) { | ||
| 1416 | int error; | ||
| 1417 | |||
| 1418 | error = ds_selftest_bts(); | ||
| 1419 | if (error) { | ||
| 1420 | WARN(1, "[ds] selftest failed. disabling bts.\n"); | ||
| 1421 | ds_cfg.sizeof_rec[ds_bts] = 0; | ||
| 1422 | } | ||
| 1423 | } | ||
| 1424 | |||
| 1425 | if (ds_cfg.sizeof_rec[ds_pebs]) { | ||
| 1426 | int error; | ||
| 1427 | |||
| 1428 | error = ds_selftest_pebs(); | ||
| 1429 | if (error) { | ||
| 1430 | WARN(1, "[ds] selftest failed. disabling pebs.\n"); | ||
| 1431 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
| 1432 | } | ||
| 1433 | } | ||
| 1434 | |||
| 1435 | return 0; | ||
| 1436 | } | ||
| 1437 | device_initcall(ds_selftest); | ||
