diff options
Diffstat (limited to 'arch/x86/kernel/ds.c')
-rw-r--r-- | arch/x86/kernel/ds.c | 953 |
1 files changed, 676 insertions, 277 deletions
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 11c11b8ec48d..5b32b6d062b4 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
@@ -2,26 +2,48 @@ | |||
2 | * Debug Store support | 2 | * Debug Store support |
3 | * | 3 | * |
4 | * This provides a low-level interface to the hardware's Debug Store | 4 | * This provides a low-level interface to the hardware's Debug Store |
5 | * feature that is used for last branch recording (LBR) and | 5 | * feature that is used for branch trace store (BTS) and |
6 | * precise-event based sampling (PEBS). | 6 | * precise-event based sampling (PEBS). |
7 | * | 7 | * |
8 | * Different architectures use a different DS layout/pointer size. | 8 | * It manages: |
9 | * The below functions therefore work on a void*. | 9 | * - per-thread and per-cpu allocation of BTS and PEBS |
10 | * - buffer memory allocation (optional) | ||
11 | * - buffer overflow handling | ||
12 | * - buffer access | ||
10 | * | 13 | * |
14 | * It assumes: | ||
15 | * - get_task_struct on all parameter tasks | ||
16 | * - current is allowed to trace parameter tasks | ||
11 | * | 17 | * |
12 | * Since there is no user for PEBS, yet, only LBR (or branch | ||
13 | * trace store, BTS) is supported. | ||
14 | * | 18 | * |
15 | * | 19 | * Copyright (C) 2007-2008 Intel Corporation. |
16 | * Copyright (C) 2007 Intel Corporation. | 20 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 |
17 | * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 | ||
18 | */ | 21 | */ |
19 | 22 | ||
23 | |||
24 | #ifdef CONFIG_X86_DS | ||
25 | |||
20 | #include <asm/ds.h> | 26 | #include <asm/ds.h> |
21 | 27 | ||
22 | #include <linux/errno.h> | 28 | #include <linux/errno.h> |
23 | #include <linux/string.h> | 29 | #include <linux/string.h> |
24 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/sched.h> | ||
32 | |||
33 | |||
34 | /* | ||
35 | * The configuration for a particular DS hardware implementation. | ||
36 | */ | ||
37 | struct ds_configuration { | ||
38 | /* the size of the DS structure in bytes */ | ||
39 | unsigned char sizeof_ds; | ||
40 | /* the size of one pointer-typed field in the DS structure in bytes; | ||
41 | this covers the first 8 fields related to buffer management. */ | ||
42 | unsigned char sizeof_field; | ||
43 | /* the size of a BTS/PEBS record in bytes */ | ||
44 | unsigned char sizeof_rec[2]; | ||
45 | }; | ||
46 | static struct ds_configuration ds_cfg; | ||
25 | 47 | ||
26 | 48 | ||
27 | /* | 49 | /* |
@@ -44,378 +66,747 @@ | |||
44 | * (interrupt occurs when write pointer passes interrupt pointer) | 66 | * (interrupt occurs when write pointer passes interrupt pointer) |
45 | * - value to which counter is reset following counter overflow | 67 | * - value to which counter is reset following counter overflow |
46 | * | 68 | * |
47 | * On later architectures, the last branch recording hardware uses | 69 | * Later architectures use 64bit pointers throughout, whereas earlier |
48 | * 64bit pointers even in 32bit mode. | 70 | * architectures use 32bit pointers in 32bit mode. |
49 | * | ||
50 | * | ||
51 | * Branch Trace Store (BTS) records store information about control | ||
52 | * flow changes. They at least provide the following information: | ||
53 | * - source linear address | ||
54 | * - destination linear address | ||
55 | * | 71 | * |
56 | * Netburst supported a predicated bit that had been dropped in later | ||
57 | * architectures. We do not suppor it. | ||
58 | * | 72 | * |
73 | * We compute the base address for the first 8 fields based on: | ||
74 | * - the field size stored in the DS configuration | ||
75 | * - the relative field position | ||
76 | * - an offset giving the start of the respective region | ||
59 | * | 77 | * |
60 | * In order to abstract from the actual DS and BTS layout, we describe | 78 | * This offset is further used to index various arrays holding |
61 | * the access to the relevant fields. | 79 | * information for BTS and PEBS at the respective index. |
62 | * Thanks to Andi Kleen for proposing this design. | ||
63 | * | 80 | * |
64 | * The implementation, however, is not as general as it might seem. In | 81 | * On later 32bit processors, we only access the lower 32bit of the |
65 | * order to stay somewhat simple and efficient, we assume an | 82 | * 64bit pointer fields. The upper halves will be zeroed out. |
66 | * underlying unsigned type (mostly a pointer type) and we expect the | ||
67 | * field to be at least as big as that type. | ||
68 | */ | 83 | */ |
69 | 84 | ||
70 | /* | 85 | enum ds_field { |
71 | * A special from_ip address to indicate that the BTS record is an | 86 | ds_buffer_base = 0, |
72 | * info record that needs to be interpreted or skipped. | 87 | ds_index, |
73 | */ | 88 | ds_absolute_maximum, |
74 | #define BTS_ESCAPE_ADDRESS (-1) | 89 | ds_interrupt_threshold, |
90 | }; | ||
75 | 91 | ||
76 | /* | 92 | enum ds_qualifier { |
77 | * A field access descriptor | 93 | ds_bts = 0, |
78 | */ | 94 | ds_pebs |
79 | struct access_desc { | ||
80 | unsigned char offset; | ||
81 | unsigned char size; | ||
82 | }; | 95 | }; |
83 | 96 | ||
97 | static inline unsigned long ds_get(const unsigned char *base, | ||
98 | enum ds_qualifier qual, enum ds_field field) | ||
99 | { | ||
100 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | ||
101 | return *(unsigned long *)base; | ||
102 | } | ||
103 | |||
104 | static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | ||
105 | enum ds_field field, unsigned long value) | ||
106 | { | ||
107 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | ||
108 | (*(unsigned long *)base) = value; | ||
109 | } | ||
110 | |||
111 | |||
84 | /* | 112 | /* |
85 | * The configuration for a particular DS/BTS hardware implementation. | 113 | * Locking is done only for allocating BTS or PEBS resources and for |
114 | * guarding context and buffer memory allocation. | ||
115 | * | ||
116 | * Most functions require the current task to own the ds context part | ||
117 | * they are going to access. All the locking is done when validating | ||
118 | * access to the context. | ||
86 | */ | 119 | */ |
87 | struct ds_configuration { | 120 | static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); |
88 | /* the DS configuration */ | ||
89 | unsigned char sizeof_ds; | ||
90 | struct access_desc bts_buffer_base; | ||
91 | struct access_desc bts_index; | ||
92 | struct access_desc bts_absolute_maximum; | ||
93 | struct access_desc bts_interrupt_threshold; | ||
94 | /* the BTS configuration */ | ||
95 | unsigned char sizeof_bts; | ||
96 | struct access_desc from_ip; | ||
97 | struct access_desc to_ip; | ||
98 | /* BTS variants used to store additional information like | ||
99 | timestamps */ | ||
100 | struct access_desc info_type; | ||
101 | struct access_desc info_data; | ||
102 | unsigned long debugctl_mask; | ||
103 | }; | ||
104 | 121 | ||
105 | /* | 122 | /* |
106 | * The global configuration used by the below accessor functions | 123 | * Validate that the current task is allowed to access the BTS/PEBS |
124 | * buffer of the parameter task. | ||
125 | * | ||
126 | * Returns 0, if access is granted; -Eerrno, otherwise. | ||
107 | */ | 127 | */ |
108 | static struct ds_configuration ds_cfg; | 128 | static inline int ds_validate_access(struct ds_context *context, |
129 | enum ds_qualifier qual) | ||
130 | { | ||
131 | if (!context) | ||
132 | return -EPERM; | ||
133 | |||
134 | if (context->owner[qual] == current) | ||
135 | return 0; | ||
136 | |||
137 | return -EPERM; | ||
138 | } | ||
139 | |||
109 | 140 | ||
110 | /* | 141 | /* |
111 | * Accessor functions for some DS and BTS fields using the above | 142 | * We either support (system-wide) per-cpu or per-thread allocation. |
112 | * global ptrace_bts_cfg. | 143 | * We distinguish the two based on the task_struct pointer, where a |
144 | * NULL pointer indicates per-cpu allocation for the current cpu. | ||
145 | * | ||
146 | * Allocations are use-counted. As soon as resources are allocated, | ||
147 | * further allocations must be of the same type (per-cpu or | ||
148 | * per-thread). We model this by counting allocations (i.e. the number | ||
149 | * of tracers of a certain type) for one type negatively: | ||
150 | * =0 no tracers | ||
151 | * >0 number of per-thread tracers | ||
152 | * <0 number of per-cpu tracers | ||
153 | * | ||
154 | * The below functions to get and put tracers and to check the | ||
155 | * allocation type require the ds_lock to be held by the caller. | ||
156 | * | ||
157 | * Tracers essentially gives the number of ds contexts for a certain | ||
158 | * type of allocation. | ||
113 | */ | 159 | */ |
114 | static inline unsigned long get_bts_buffer_base(char *base) | 160 | static long tracers; |
161 | |||
162 | static inline void get_tracer(struct task_struct *task) | ||
115 | { | 163 | { |
116 | return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); | 164 | tracers += (task ? 1 : -1); |
117 | } | 165 | } |
118 | static inline void set_bts_buffer_base(char *base, unsigned long value) | 166 | |
167 | static inline void put_tracer(struct task_struct *task) | ||
119 | { | 168 | { |
120 | (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; | 169 | tracers -= (task ? 1 : -1); |
121 | } | 170 | } |
122 | static inline unsigned long get_bts_index(char *base) | 171 | |
172 | static inline int check_tracer(struct task_struct *task) | ||
123 | { | 173 | { |
124 | return *(unsigned long *)(base + ds_cfg.bts_index.offset); | 174 | return (task ? (tracers >= 0) : (tracers <= 0)); |
125 | } | 175 | } |
126 | static inline void set_bts_index(char *base, unsigned long value) | 176 | |
177 | |||
178 | /* | ||
179 | * The DS context is either attached to a thread or to a cpu: | ||
180 | * - in the former case, the thread_struct contains a pointer to the | ||
181 | * attached context. | ||
182 | * - in the latter case, we use a static array of per-cpu context | ||
183 | * pointers. | ||
184 | * | ||
185 | * Contexts are use-counted. They are allocated on first access and | ||
186 | * deallocated when the last user puts the context. | ||
187 | * | ||
188 | * We distinguish between an allocating and a non-allocating get of a | ||
189 | * context: | ||
190 | * - the allocating get is used for requesting BTS/PEBS resources. It | ||
191 | * requires the caller to hold the global ds_lock. | ||
192 | * - the non-allocating get is used for all other cases. A | ||
193 | * non-existing context indicates an error. It acquires and releases | ||
194 | * the ds_lock itself for obtaining the context. | ||
195 | * | ||
196 | * A context and its DS configuration are allocated and deallocated | ||
197 | * together. A context always has a DS configuration of the | ||
198 | * appropriate size. | ||
199 | */ | ||
200 | static DEFINE_PER_CPU(struct ds_context *, system_context); | ||
201 | |||
202 | #define this_system_context per_cpu(system_context, smp_processor_id()) | ||
203 | |||
204 | /* | ||
205 | * Returns the pointer to the parameter task's context or to the | ||
206 | * system-wide context, if task is NULL. | ||
207 | * | ||
208 | * Increases the use count of the returned context, if not NULL. | ||
209 | */ | ||
210 | static inline struct ds_context *ds_get_context(struct task_struct *task) | ||
127 | { | 211 | { |
128 | (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; | 212 | struct ds_context *context; |
213 | |||
214 | spin_lock(&ds_lock); | ||
215 | |||
216 | context = (task ? task->thread.ds_ctx : this_system_context); | ||
217 | if (context) | ||
218 | context->count++; | ||
219 | |||
220 | spin_unlock(&ds_lock); | ||
221 | |||
222 | return context; | ||
129 | } | 223 | } |
130 | static inline unsigned long get_bts_absolute_maximum(char *base) | 224 | |
225 | /* | ||
226 | * Same as ds_get_context, but allocates the context and it's DS | ||
227 | * structure, if necessary; returns NULL; if out of memory. | ||
228 | * | ||
229 | * pre: requires ds_lock to be held | ||
230 | */ | ||
231 | static inline struct ds_context *ds_alloc_context(struct task_struct *task) | ||
131 | { | 232 | { |
132 | return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); | 233 | struct ds_context **p_context = |
234 | (task ? &task->thread.ds_ctx : &this_system_context); | ||
235 | struct ds_context *context = *p_context; | ||
236 | |||
237 | if (!context) { | ||
238 | context = kzalloc(sizeof(*context), GFP_KERNEL); | ||
239 | |||
240 | if (!context) | ||
241 | return 0; | ||
242 | |||
243 | context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | ||
244 | if (!context->ds) { | ||
245 | kfree(context); | ||
246 | return 0; | ||
247 | } | ||
248 | |||
249 | *p_context = context; | ||
250 | |||
251 | context->this = p_context; | ||
252 | context->task = task; | ||
253 | |||
254 | if (task) | ||
255 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
256 | |||
257 | if (!task || (task == current)) | ||
258 | wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); | ||
259 | |||
260 | get_tracer(task); | ||
261 | } | ||
262 | |||
263 | context->count++; | ||
264 | |||
265 | return context; | ||
133 | } | 266 | } |
134 | static inline void set_bts_absolute_maximum(char *base, unsigned long value) | 267 | |
268 | /* | ||
269 | * Decreases the use count of the parameter context, if not NULL. | ||
270 | * Deallocates the context, if the use count reaches zero. | ||
271 | */ | ||
272 | static inline void ds_put_context(struct ds_context *context) | ||
135 | { | 273 | { |
136 | (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; | 274 | if (!context) |
275 | return; | ||
276 | |||
277 | spin_lock(&ds_lock); | ||
278 | |||
279 | if (--context->count) | ||
280 | goto out; | ||
281 | |||
282 | *(context->this) = 0; | ||
283 | |||
284 | if (context->task) | ||
285 | clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | ||
286 | |||
287 | if (!context->task || (context->task == current)) | ||
288 | wrmsrl(MSR_IA32_DS_AREA, 0); | ||
289 | |||
290 | put_tracer(context->task); | ||
291 | |||
292 | /* free any leftover buffers from tracers that did not | ||
293 | * deallocate them properly. */ | ||
294 | kfree(context->buffer[ds_bts]); | ||
295 | kfree(context->buffer[ds_pebs]); | ||
296 | kfree(context->ds); | ||
297 | kfree(context); | ||
298 | out: | ||
299 | spin_unlock(&ds_lock); | ||
137 | } | 300 | } |
138 | static inline unsigned long get_bts_interrupt_threshold(char *base) | 301 | |
302 | |||
303 | /* | ||
304 | * Handle a buffer overflow | ||
305 | * | ||
306 | * task: the task whose buffers are overflowing; | ||
307 | * NULL for a buffer overflow on the current cpu | ||
308 | * context: the ds context | ||
309 | * qual: the buffer type | ||
310 | */ | ||
311 | static void ds_overflow(struct task_struct *task, struct ds_context *context, | ||
312 | enum ds_qualifier qual) | ||
139 | { | 313 | { |
140 | return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); | 314 | if (!context) |
315 | return; | ||
316 | |||
317 | if (context->callback[qual]) | ||
318 | (*context->callback[qual])(task); | ||
319 | |||
320 | /* todo: do some more overflow handling */ | ||
141 | } | 321 | } |
142 | static inline void set_bts_interrupt_threshold(char *base, unsigned long value) | 322 | |
323 | |||
324 | /* | ||
325 | * Allocate a non-pageable buffer of the parameter size. | ||
326 | * Checks the memory and the locked memory rlimit. | ||
327 | * | ||
328 | * Returns the buffer, if successful; | ||
329 | * NULL, if out of memory or rlimit exceeded. | ||
330 | * | ||
331 | * size: the requested buffer size in bytes | ||
332 | * pages (out): if not NULL, contains the number of pages reserved | ||
333 | */ | ||
334 | static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) | ||
143 | { | 335 | { |
144 | (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; | 336 | unsigned long rlim, vm, pgsz; |
337 | void *buffer; | ||
338 | |||
339 | pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; | ||
340 | |||
341 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | ||
342 | vm = current->mm->total_vm + pgsz; | ||
343 | if (rlim < vm) | ||
344 | return 0; | ||
345 | |||
346 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | ||
347 | vm = current->mm->locked_vm + pgsz; | ||
348 | if (rlim < vm) | ||
349 | return 0; | ||
350 | |||
351 | buffer = kzalloc(size, GFP_KERNEL); | ||
352 | if (!buffer) | ||
353 | return 0; | ||
354 | |||
355 | current->mm->total_vm += pgsz; | ||
356 | current->mm->locked_vm += pgsz; | ||
357 | |||
358 | if (pages) | ||
359 | *pages = pgsz; | ||
360 | |||
361 | return buffer; | ||
145 | } | 362 | } |
146 | static inline unsigned long get_from_ip(char *base) | 363 | |
364 | static int ds_request(struct task_struct *task, void *base, size_t size, | ||
365 | ds_ovfl_callback_t ovfl, enum ds_qualifier qual) | ||
147 | { | 366 | { |
148 | return *(unsigned long *)(base + ds_cfg.from_ip.offset); | 367 | struct ds_context *context; |
368 | unsigned long buffer, adj; | ||
369 | const unsigned long alignment = (1 << 3); | ||
370 | int error = 0; | ||
371 | |||
372 | if (!ds_cfg.sizeof_ds) | ||
373 | return -EOPNOTSUPP; | ||
374 | |||
375 | /* we require some space to do alignment adjustments below */ | ||
376 | if (size < (alignment + ds_cfg.sizeof_rec[qual])) | ||
377 | return -EINVAL; | ||
378 | |||
379 | /* buffer overflow notification is not yet implemented */ | ||
380 | if (ovfl) | ||
381 | return -EOPNOTSUPP; | ||
382 | |||
383 | |||
384 | spin_lock(&ds_lock); | ||
385 | |||
386 | if (!check_tracer(task)) | ||
387 | return -EPERM; | ||
388 | |||
389 | error = -ENOMEM; | ||
390 | context = ds_alloc_context(task); | ||
391 | if (!context) | ||
392 | goto out_unlock; | ||
393 | |||
394 | error = -EALREADY; | ||
395 | if (context->owner[qual] == current) | ||
396 | goto out_unlock; | ||
397 | error = -EPERM; | ||
398 | if (context->owner[qual] != 0) | ||
399 | goto out_unlock; | ||
400 | context->owner[qual] = current; | ||
401 | |||
402 | spin_unlock(&ds_lock); | ||
403 | |||
404 | |||
405 | error = -ENOMEM; | ||
406 | if (!base) { | ||
407 | base = ds_allocate_buffer(size, &context->pages[qual]); | ||
408 | if (!base) | ||
409 | goto out_release; | ||
410 | |||
411 | context->buffer[qual] = base; | ||
412 | } | ||
413 | error = 0; | ||
414 | |||
415 | context->callback[qual] = ovfl; | ||
416 | |||
417 | /* adjust the buffer address and size to meet alignment | ||
418 | * constraints: | ||
419 | * - buffer is double-word aligned | ||
420 | * - size is multiple of record size | ||
421 | * | ||
422 | * We checked the size at the very beginning; we have enough | ||
423 | * space to do the adjustment. | ||
424 | */ | ||
425 | buffer = (unsigned long)base; | ||
426 | |||
427 | adj = ALIGN(buffer, alignment) - buffer; | ||
428 | buffer += adj; | ||
429 | size -= adj; | ||
430 | |||
431 | size /= ds_cfg.sizeof_rec[qual]; | ||
432 | size *= ds_cfg.sizeof_rec[qual]; | ||
433 | |||
434 | ds_set(context->ds, qual, ds_buffer_base, buffer); | ||
435 | ds_set(context->ds, qual, ds_index, buffer); | ||
436 | ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); | ||
437 | |||
438 | if (ovfl) { | ||
439 | /* todo: select a suitable interrupt threshold */ | ||
440 | } else | ||
441 | ds_set(context->ds, qual, | ||
442 | ds_interrupt_threshold, buffer + size + 1); | ||
443 | |||
444 | /* we keep the context until ds_release */ | ||
445 | return error; | ||
446 | |||
447 | out_release: | ||
448 | context->owner[qual] = 0; | ||
449 | ds_put_context(context); | ||
450 | return error; | ||
451 | |||
452 | out_unlock: | ||
453 | spin_unlock(&ds_lock); | ||
454 | ds_put_context(context); | ||
455 | return error; | ||
149 | } | 456 | } |
150 | static inline void set_from_ip(char *base, unsigned long value) | 457 | |
458 | int ds_request_bts(struct task_struct *task, void *base, size_t size, | ||
459 | ds_ovfl_callback_t ovfl) | ||
151 | { | 460 | { |
152 | (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; | 461 | return ds_request(task, base, size, ovfl, ds_bts); |
153 | } | 462 | } |
154 | static inline unsigned long get_to_ip(char *base) | 463 | |
464 | int ds_request_pebs(struct task_struct *task, void *base, size_t size, | ||
465 | ds_ovfl_callback_t ovfl) | ||
155 | { | 466 | { |
156 | return *(unsigned long *)(base + ds_cfg.to_ip.offset); | 467 | return ds_request(task, base, size, ovfl, ds_pebs); |
157 | } | 468 | } |
158 | static inline void set_to_ip(char *base, unsigned long value) | 469 | |
470 | static int ds_release(struct task_struct *task, enum ds_qualifier qual) | ||
159 | { | 471 | { |
160 | (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; | 472 | struct ds_context *context; |
473 | int error; | ||
474 | |||
475 | context = ds_get_context(task); | ||
476 | error = ds_validate_access(context, qual); | ||
477 | if (error < 0) | ||
478 | goto out; | ||
479 | |||
480 | kfree(context->buffer[qual]); | ||
481 | context->buffer[qual] = 0; | ||
482 | |||
483 | current->mm->total_vm -= context->pages[qual]; | ||
484 | current->mm->locked_vm -= context->pages[qual]; | ||
485 | context->pages[qual] = 0; | ||
486 | context->owner[qual] = 0; | ||
487 | |||
488 | /* | ||
489 | * we put the context twice: | ||
490 | * once for the ds_get_context | ||
491 | * once for the corresponding ds_request | ||
492 | */ | ||
493 | ds_put_context(context); | ||
494 | out: | ||
495 | ds_put_context(context); | ||
496 | return error; | ||
161 | } | 497 | } |
162 | static inline unsigned char get_info_type(char *base) | 498 | |
499 | int ds_release_bts(struct task_struct *task) | ||
163 | { | 500 | { |
164 | return *(unsigned char *)(base + ds_cfg.info_type.offset); | 501 | return ds_release(task, ds_bts); |
165 | } | 502 | } |
166 | static inline void set_info_type(char *base, unsigned char value) | 503 | |
504 | int ds_release_pebs(struct task_struct *task) | ||
167 | { | 505 | { |
168 | (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; | 506 | return ds_release(task, ds_pebs); |
169 | } | 507 | } |
170 | static inline unsigned long get_info_data(char *base) | 508 | |
509 | static int ds_get_index(struct task_struct *task, size_t *pos, | ||
510 | enum ds_qualifier qual) | ||
171 | { | 511 | { |
172 | return *(unsigned long *)(base + ds_cfg.info_data.offset); | 512 | struct ds_context *context; |
513 | unsigned long base, index; | ||
514 | int error; | ||
515 | |||
516 | context = ds_get_context(task); | ||
517 | error = ds_validate_access(context, qual); | ||
518 | if (error < 0) | ||
519 | goto out; | ||
520 | |||
521 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
522 | index = ds_get(context->ds, qual, ds_index); | ||
523 | |||
524 | error = ((index - base) / ds_cfg.sizeof_rec[qual]); | ||
525 | if (pos) | ||
526 | *pos = error; | ||
527 | out: | ||
528 | ds_put_context(context); | ||
529 | return error; | ||
173 | } | 530 | } |
174 | static inline void set_info_data(char *base, unsigned long value) | 531 | |
532 | int ds_get_bts_index(struct task_struct *task, size_t *pos) | ||
175 | { | 533 | { |
176 | (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; | 534 | return ds_get_index(task, pos, ds_bts); |
177 | } | 535 | } |
178 | 536 | ||
537 | int ds_get_pebs_index(struct task_struct *task, size_t *pos) | ||
538 | { | ||
539 | return ds_get_index(task, pos, ds_pebs); | ||
540 | } | ||
179 | 541 | ||
180 | int ds_allocate(void **dsp, size_t bts_size_in_bytes) | 542 | static int ds_get_end(struct task_struct *task, size_t *pos, |
543 | enum ds_qualifier qual) | ||
181 | { | 544 | { |
182 | size_t bts_size_in_records; | 545 | struct ds_context *context; |
183 | unsigned long bts; | 546 | unsigned long base, end; |
184 | void *ds; | 547 | int error; |
548 | |||
549 | context = ds_get_context(task); | ||
550 | error = ds_validate_access(context, qual); | ||
551 | if (error < 0) | ||
552 | goto out; | ||
553 | |||
554 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
555 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
556 | |||
557 | error = ((end - base) / ds_cfg.sizeof_rec[qual]); | ||
558 | if (pos) | ||
559 | *pos = error; | ||
560 | out: | ||
561 | ds_put_context(context); | ||
562 | return error; | ||
563 | } | ||
185 | 564 | ||
186 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | 565 | int ds_get_bts_end(struct task_struct *task, size_t *pos) |
187 | return -EOPNOTSUPP; | 566 | { |
567 | return ds_get_end(task, pos, ds_bts); | ||
568 | } | ||
188 | 569 | ||
189 | if (bts_size_in_bytes < 0) | 570 | int ds_get_pebs_end(struct task_struct *task, size_t *pos) |
190 | return -EINVAL; | 571 | { |
572 | return ds_get_end(task, pos, ds_pebs); | ||
573 | } | ||
191 | 574 | ||
192 | bts_size_in_records = | 575 | static int ds_access(struct task_struct *task, size_t index, |
193 | bts_size_in_bytes / ds_cfg.sizeof_bts; | 576 | const void **record, enum ds_qualifier qual) |
194 | bts_size_in_bytes = | 577 | { |
195 | bts_size_in_records * ds_cfg.sizeof_bts; | 578 | struct ds_context *context; |
579 | unsigned long base, idx; | ||
580 | int error; | ||
196 | 581 | ||
197 | if (bts_size_in_bytes <= 0) | 582 | if (!record) |
198 | return -EINVAL; | 583 | return -EINVAL; |
199 | 584 | ||
200 | bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); | 585 | context = ds_get_context(task); |
586 | error = ds_validate_access(context, qual); | ||
587 | if (error < 0) | ||
588 | goto out; | ||
201 | 589 | ||
202 | if (!bts) | 590 | base = ds_get(context->ds, qual, ds_buffer_base); |
203 | return -ENOMEM; | 591 | idx = base + (index * ds_cfg.sizeof_rec[qual]); |
204 | 592 | ||
205 | ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | 593 | error = -EINVAL; |
594 | if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) | ||
595 | goto out; | ||
206 | 596 | ||
207 | if (!ds) { | 597 | *record = (const void *)idx; |
208 | kfree((void *)bts); | 598 | error = ds_cfg.sizeof_rec[qual]; |
209 | return -ENOMEM; | 599 | out: |
210 | } | 600 | ds_put_context(context); |
211 | 601 | return error; | |
212 | set_bts_buffer_base(ds, bts); | ||
213 | set_bts_index(ds, bts); | ||
214 | set_bts_absolute_maximum(ds, bts + bts_size_in_bytes); | ||
215 | set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1); | ||
216 | |||
217 | *dsp = ds; | ||
218 | return 0; | ||
219 | } | 602 | } |
220 | 603 | ||
221 | int ds_free(void **dsp) | 604 | int ds_access_bts(struct task_struct *task, size_t index, const void **record) |
222 | { | 605 | { |
223 | if (*dsp) { | 606 | return ds_access(task, index, record, ds_bts); |
224 | kfree((void *)get_bts_buffer_base(*dsp)); | ||
225 | kfree(*dsp); | ||
226 | *dsp = NULL; | ||
227 | } | ||
228 | return 0; | ||
229 | } | 607 | } |
230 | 608 | ||
231 | int ds_get_bts_size(void *ds) | 609 | int ds_access_pebs(struct task_struct *task, size_t index, const void **record) |
232 | { | 610 | { |
233 | int size_in_bytes; | 611 | return ds_access(task, index, record, ds_pebs); |
234 | |||
235 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
236 | return -EOPNOTSUPP; | ||
237 | |||
238 | if (!ds) | ||
239 | return 0; | ||
240 | |||
241 | size_in_bytes = | ||
242 | get_bts_absolute_maximum(ds) - | ||
243 | get_bts_buffer_base(ds); | ||
244 | return size_in_bytes; | ||
245 | } | 612 | } |
246 | 613 | ||
247 | int ds_get_bts_end(void *ds) | 614 | static int ds_write(struct task_struct *task, const void *record, size_t size, |
615 | enum ds_qualifier qual, int force) | ||
248 | { | 616 | { |
249 | int size_in_bytes = ds_get_bts_size(ds); | 617 | struct ds_context *context; |
250 | 618 | int error; | |
251 | if (size_in_bytes <= 0) | ||
252 | return size_in_bytes; | ||
253 | 619 | ||
254 | return size_in_bytes / ds_cfg.sizeof_bts; | 620 | if (!record) |
255 | } | 621 | return -EINVAL; |
256 | 622 | ||
257 | int ds_get_bts_index(void *ds) | 623 | error = -EPERM; |
258 | { | 624 | context = ds_get_context(task); |
259 | int index_offset_in_bytes; | 625 | if (!context) |
626 | goto out; | ||
260 | 627 | ||
261 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | 628 | if (!force) { |
262 | return -EOPNOTSUPP; | 629 | error = ds_validate_access(context, qual); |
630 | if (error < 0) | ||
631 | goto out; | ||
632 | } | ||
263 | 633 | ||
264 | index_offset_in_bytes = | 634 | error = 0; |
265 | get_bts_index(ds) - | 635 | while (size) { |
266 | get_bts_buffer_base(ds); | 636 | unsigned long base, index, end, write_end, int_th; |
637 | unsigned long write_size, adj_write_size; | ||
638 | |||
639 | /* | ||
640 | * write as much as possible without producing an | ||
641 | * overflow interrupt. | ||
642 | * | ||
643 | * interrupt_threshold must either be | ||
644 | * - bigger than absolute_maximum or | ||
645 | * - point to a record between buffer_base and absolute_maximum | ||
646 | * | ||
647 | * index points to a valid record. | ||
648 | */ | ||
649 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
650 | index = ds_get(context->ds, qual, ds_index); | ||
651 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
652 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
653 | |||
654 | write_end = min(end, int_th); | ||
655 | |||
656 | /* if we are already beyond the interrupt threshold, | ||
657 | * we fill the entire buffer */ | ||
658 | if (write_end <= index) | ||
659 | write_end = end; | ||
660 | |||
661 | if (write_end <= index) | ||
662 | goto out; | ||
663 | |||
664 | write_size = min((unsigned long) size, write_end - index); | ||
665 | memcpy((void *)index, record, write_size); | ||
666 | |||
667 | record = (const char *)record + write_size; | ||
668 | size -= write_size; | ||
669 | error += write_size; | ||
670 | |||
671 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | ||
672 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | ||
673 | |||
674 | /* zero out trailing bytes */ | ||
675 | memset((char *)index + write_size, 0, | ||
676 | adj_write_size - write_size); | ||
677 | index += adj_write_size; | ||
678 | |||
679 | if (index >= end) | ||
680 | index = base; | ||
681 | ds_set(context->ds, qual, ds_index, index); | ||
682 | |||
683 | if (index >= int_th) | ||
684 | ds_overflow(task, context, qual); | ||
685 | } | ||
267 | 686 | ||
268 | return index_offset_in_bytes / ds_cfg.sizeof_bts; | 687 | out: |
688 | ds_put_context(context); | ||
689 | return error; | ||
269 | } | 690 | } |
270 | 691 | ||
271 | int ds_set_overflow(void *ds, int method) | 692 | int ds_write_bts(struct task_struct *task, const void *record, size_t size) |
272 | { | 693 | { |
273 | switch (method) { | 694 | return ds_write(task, record, size, ds_bts, /* force = */ 0); |
274 | case DS_O_SIGNAL: | ||
275 | return -EOPNOTSUPP; | ||
276 | case DS_O_WRAP: | ||
277 | return 0; | ||
278 | default: | ||
279 | return -EINVAL; | ||
280 | } | ||
281 | } | 695 | } |
282 | 696 | ||
283 | int ds_get_overflow(void *ds) | 697 | int ds_write_pebs(struct task_struct *task, const void *record, size_t size) |
284 | { | 698 | { |
285 | return DS_O_WRAP; | 699 | return ds_write(task, record, size, ds_pebs, /* force = */ 0); |
286 | } | 700 | } |
287 | 701 | ||
288 | int ds_clear(void *ds) | 702 | int ds_unchecked_write_bts(struct task_struct *task, |
703 | const void *record, size_t size) | ||
289 | { | 704 | { |
290 | int bts_size = ds_get_bts_size(ds); | 705 | return ds_write(task, record, size, ds_bts, /* force = */ 1); |
291 | unsigned long bts_base; | ||
292 | |||
293 | if (bts_size <= 0) | ||
294 | return bts_size; | ||
295 | |||
296 | bts_base = get_bts_buffer_base(ds); | ||
297 | memset((void *)bts_base, 0, bts_size); | ||
298 | |||
299 | set_bts_index(ds, bts_base); | ||
300 | return 0; | ||
301 | } | 706 | } |
302 | 707 | ||
303 | int ds_read_bts(void *ds, int index, struct bts_struct *out) | 708 | int ds_unchecked_write_pebs(struct task_struct *task, |
709 | const void *record, size_t size) | ||
304 | { | 710 | { |
305 | void *bts; | 711 | return ds_write(task, record, size, ds_pebs, /* force = */ 1); |
712 | } | ||
306 | 713 | ||
307 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | 714 | static int ds_reset_or_clear(struct task_struct *task, |
308 | return -EOPNOTSUPP; | 715 | enum ds_qualifier qual, int clear) |
716 | { | ||
717 | struct ds_context *context; | ||
718 | unsigned long base, end; | ||
719 | int error; | ||
309 | 720 | ||
310 | if (index < 0) | 721 | context = ds_get_context(task); |
311 | return -EINVAL; | 722 | error = ds_validate_access(context, qual); |
723 | if (error < 0) | ||
724 | goto out; | ||
312 | 725 | ||
313 | if (index >= ds_get_bts_size(ds)) | 726 | base = ds_get(context->ds, qual, ds_buffer_base); |
314 | return -EINVAL; | 727 | end = ds_get(context->ds, qual, ds_absolute_maximum); |
315 | 728 | ||
316 | bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); | 729 | if (clear) |
730 | memset((void *)base, 0, end - base); | ||
317 | 731 | ||
318 | memset(out, 0, sizeof(*out)); | 732 | ds_set(context->ds, qual, ds_index, base); |
319 | if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) { | ||
320 | out->qualifier = get_info_type(bts); | ||
321 | out->variant.jiffies = get_info_data(bts); | ||
322 | } else { | ||
323 | out->qualifier = BTS_BRANCH; | ||
324 | out->variant.lbr.from_ip = get_from_ip(bts); | ||
325 | out->variant.lbr.to_ip = get_to_ip(bts); | ||
326 | } | ||
327 | 733 | ||
328 | return sizeof(*out);; | 734 | error = 0; |
735 | out: | ||
736 | ds_put_context(context); | ||
737 | return error; | ||
329 | } | 738 | } |
330 | 739 | ||
331 | int ds_write_bts(void *ds, const struct bts_struct *in) | 740 | int ds_reset_bts(struct task_struct *task) |
332 | { | 741 | { |
333 | unsigned long bts; | 742 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); |
334 | 743 | } | |
335 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
336 | return -EOPNOTSUPP; | ||
337 | |||
338 | if (ds_get_bts_size(ds) <= 0) | ||
339 | return -ENXIO; | ||
340 | 744 | ||
341 | bts = get_bts_index(ds); | 745 | int ds_reset_pebs(struct task_struct *task) |
746 | { | ||
747 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); | ||
748 | } | ||
342 | 749 | ||
343 | memset((void *)bts, 0, ds_cfg.sizeof_bts); | 750 | int ds_clear_bts(struct task_struct *task) |
344 | switch (in->qualifier) { | 751 | { |
345 | case BTS_INVALID: | 752 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); |
346 | break; | 753 | } |
347 | 754 | ||
348 | case BTS_BRANCH: | 755 | int ds_clear_pebs(struct task_struct *task) |
349 | set_from_ip((void *)bts, in->variant.lbr.from_ip); | 756 | { |
350 | set_to_ip((void *)bts, in->variant.lbr.to_ip); | 757 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); |
351 | break; | 758 | } |
352 | 759 | ||
353 | case BTS_TASK_ARRIVES: | 760 | int ds_get_pebs_reset(struct task_struct *task, u64 *value) |
354 | case BTS_TASK_DEPARTS: | 761 | { |
355 | set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); | 762 | struct ds_context *context; |
356 | set_info_type((void *)bts, in->qualifier); | 763 | int error; |
357 | set_info_data((void *)bts, in->variant.jiffies); | ||
358 | break; | ||
359 | 764 | ||
360 | default: | 765 | if (!value) |
361 | return -EINVAL; | 766 | return -EINVAL; |
362 | } | ||
363 | 767 | ||
364 | bts = bts + ds_cfg.sizeof_bts; | 768 | context = ds_get_context(task); |
365 | if (bts >= get_bts_absolute_maximum(ds)) | 769 | error = ds_validate_access(context, ds_pebs); |
366 | bts = get_bts_buffer_base(ds); | 770 | if (error < 0) |
367 | set_bts_index(ds, bts); | 771 | goto out; |
368 | 772 | ||
369 | return ds_cfg.sizeof_bts; | 773 | *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); |
774 | |||
775 | error = 0; | ||
776 | out: | ||
777 | ds_put_context(context); | ||
778 | return error; | ||
370 | } | 779 | } |
371 | 780 | ||
372 | unsigned long ds_debugctl_mask(void) | 781 | int ds_set_pebs_reset(struct task_struct *task, u64 value) |
373 | { | 782 | { |
374 | return ds_cfg.debugctl_mask; | 783 | struct ds_context *context; |
375 | } | 784 | int error; |
376 | 785 | ||
377 | #ifdef __i386__ | 786 | context = ds_get_context(task); |
378 | static const struct ds_configuration ds_cfg_netburst = { | 787 | error = ds_validate_access(context, ds_pebs); |
379 | .sizeof_ds = 9 * 4, | 788 | if (error < 0) |
380 | .bts_buffer_base = { 0, 4 }, | 789 | goto out; |
381 | .bts_index = { 4, 4 }, | ||
382 | .bts_absolute_maximum = { 8, 4 }, | ||
383 | .bts_interrupt_threshold = { 12, 4 }, | ||
384 | .sizeof_bts = 3 * 4, | ||
385 | .from_ip = { 0, 4 }, | ||
386 | .to_ip = { 4, 4 }, | ||
387 | .info_type = { 4, 1 }, | ||
388 | .info_data = { 8, 4 }, | ||
389 | .debugctl_mask = (1<<2)|(1<<3) | ||
390 | }; | ||
391 | 790 | ||
392 | static const struct ds_configuration ds_cfg_pentium_m = { | 791 | *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; |
393 | .sizeof_ds = 9 * 4, | 792 | |
394 | .bts_buffer_base = { 0, 4 }, | 793 | error = 0; |
395 | .bts_index = { 4, 4 }, | 794 | out: |
396 | .bts_absolute_maximum = { 8, 4 }, | 795 | ds_put_context(context); |
397 | .bts_interrupt_threshold = { 12, 4 }, | 796 | return error; |
398 | .sizeof_bts = 3 * 4, | 797 | } |
399 | .from_ip = { 0, 4 }, | 798 | |
400 | .to_ip = { 4, 4 }, | 799 | static const struct ds_configuration ds_cfg_var = { |
401 | .info_type = { 4, 1 }, | 800 | .sizeof_ds = sizeof(long) * 12, |
402 | .info_data = { 8, 4 }, | 801 | .sizeof_field = sizeof(long), |
403 | .debugctl_mask = (1<<6)|(1<<7) | 802 | .sizeof_rec[ds_bts] = sizeof(long) * 3, |
803 | .sizeof_rec[ds_pebs] = sizeof(long) * 10 | ||
404 | }; | 804 | }; |
405 | #endif /* _i386_ */ | 805 | static const struct ds_configuration ds_cfg_64 = { |
406 | 806 | .sizeof_ds = 8 * 12, | |
407 | static const struct ds_configuration ds_cfg_core2 = { | 807 | .sizeof_field = 8, |
408 | .sizeof_ds = 9 * 8, | 808 | .sizeof_rec[ds_bts] = 8 * 3, |
409 | .bts_buffer_base = { 0, 8 }, | 809 | .sizeof_rec[ds_pebs] = 8 * 10 |
410 | .bts_index = { 8, 8 }, | ||
411 | .bts_absolute_maximum = { 16, 8 }, | ||
412 | .bts_interrupt_threshold = { 24, 8 }, | ||
413 | .sizeof_bts = 3 * 8, | ||
414 | .from_ip = { 0, 8 }, | ||
415 | .to_ip = { 8, 8 }, | ||
416 | .info_type = { 8, 1 }, | ||
417 | .info_data = { 16, 8 }, | ||
418 | .debugctl_mask = (1<<6)|(1<<7)|(1<<9) | ||
419 | }; | 810 | }; |
420 | 811 | ||
421 | static inline void | 812 | static inline void |
@@ -429,14 +820,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
429 | switch (c->x86) { | 820 | switch (c->x86) { |
430 | case 0x6: | 821 | case 0x6: |
431 | switch (c->x86_model) { | 822 | switch (c->x86_model) { |
432 | #ifdef __i386__ | ||
433 | case 0xD: | 823 | case 0xD: |
434 | case 0xE: /* Pentium M */ | 824 | case 0xE: /* Pentium M */ |
435 | ds_configure(&ds_cfg_pentium_m); | 825 | ds_configure(&ds_cfg_var); |
436 | break; | 826 | break; |
437 | #endif /* _i386_ */ | ||
438 | case 0xF: /* Core2 */ | 827 | case 0xF: /* Core2 */ |
439 | ds_configure(&ds_cfg_core2); | 828 | case 0x1C: /* Atom */ |
829 | ds_configure(&ds_cfg_64); | ||
440 | break; | 830 | break; |
441 | default: | 831 | default: |
442 | /* sorry, don't know about them */ | 832 | /* sorry, don't know about them */ |
@@ -445,13 +835,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
445 | break; | 835 | break; |
446 | case 0xF: | 836 | case 0xF: |
447 | switch (c->x86_model) { | 837 | switch (c->x86_model) { |
448 | #ifdef __i386__ | ||
449 | case 0x0: | 838 | case 0x0: |
450 | case 0x1: | 839 | case 0x1: |
451 | case 0x2: /* Netburst */ | 840 | case 0x2: /* Netburst */ |
452 | ds_configure(&ds_cfg_netburst); | 841 | ds_configure(&ds_cfg_var); |
453 | break; | 842 | break; |
454 | #endif /* _i386_ */ | ||
455 | default: | 843 | default: |
456 | /* sorry, don't know about them */ | 844 | /* sorry, don't know about them */ |
457 | break; | 845 | break; |
@@ -462,3 +850,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
462 | break; | 850 | break; |
463 | } | 851 | } |
464 | } | 852 | } |
853 | |||
854 | void ds_free(struct ds_context *context) | ||
855 | { | ||
856 | /* This is called when the task owning the parameter context | ||
857 | * is dying. There should not be any user of that context left | ||
858 | * to disturb us, anymore. */ | ||
859 | unsigned long leftovers = context->count; | ||
860 | while (leftovers--) | ||
861 | ds_put_context(context); | ||
862 | } | ||
863 | #endif /* CONFIG_X86_DS */ | ||