diff options
Diffstat (limited to 'arch/x86/kernel/ds.c')
-rw-r--r-- | arch/x86/kernel/ds.c | 1153 |
1 files changed, 651 insertions, 502 deletions
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index d1a121443bde..da91701a2348 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
@@ -6,14 +6,13 @@ | |||
6 | * precise-event based sampling (PEBS). | 6 | * precise-event based sampling (PEBS). |
7 | * | 7 | * |
8 | * It manages: | 8 | * It manages: |
9 | * - per-thread and per-cpu allocation of BTS and PEBS | 9 | * - DS and BTS hardware configuration |
10 | * - buffer memory allocation (optional) | 10 | * - buffer overflow handling (to be done) |
11 | * - buffer overflow handling | ||
12 | * - buffer access | 11 | * - buffer access |
13 | * | 12 | * |
14 | * It assumes: | 13 | * It does not do: |
15 | * - get_task_struct on all parameter tasks | 14 | * - security checking (is the caller allowed to trace the task) |
16 | * - current is allowed to trace parameter tasks | 15 | * - buffer allocation (memory accounting) |
17 | * | 16 | * |
18 | * | 17 | * |
19 | * Copyright (C) 2007-2008 Intel Corporation. | 18 | * Copyright (C) 2007-2008 Intel Corporation. |
@@ -21,8 +20,6 @@ | |||
21 | */ | 20 | */ |
22 | 21 | ||
23 | 22 | ||
24 | #ifdef CONFIG_X86_DS | ||
25 | |||
26 | #include <asm/ds.h> | 23 | #include <asm/ds.h> |
27 | 24 | ||
28 | #include <linux/errno.h> | 25 | #include <linux/errno.h> |
@@ -30,24 +27,71 @@ | |||
30 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
31 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
32 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
30 | #include <linux/kernel.h> | ||
33 | 31 | ||
34 | 32 | ||
35 | /* | 33 | /* |
36 | * The configuration for a particular DS hardware implementation. | 34 | * The configuration for a particular DS hardware implementation. |
37 | */ | 35 | */ |
38 | struct ds_configuration { | 36 | struct ds_configuration { |
39 | /* the size of the DS structure in bytes */ | 37 | /* the name of the configuration */ |
40 | unsigned char sizeof_ds; | 38 | const char *name; |
41 | /* the size of one pointer-typed field in the DS structure in bytes; | 39 | /* the size of one pointer-typed field in the DS structure and |
42 | this covers the first 8 fields related to buffer management. */ | 40 | in the BTS and PEBS buffers in bytes; |
41 | this covers the first 8 DS fields related to buffer management. */ | ||
43 | unsigned char sizeof_field; | 42 | unsigned char sizeof_field; |
44 | /* the size of a BTS/PEBS record in bytes */ | 43 | /* the size of a BTS/PEBS record in bytes */ |
45 | unsigned char sizeof_rec[2]; | 44 | unsigned char sizeof_rec[2]; |
45 | /* a series of bit-masks to control various features indexed | ||
46 | * by enum ds_feature */ | ||
47 | unsigned long ctl[dsf_ctl_max]; | ||
46 | }; | 48 | }; |
47 | static struct ds_configuration ds_cfg; | 49 | static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); |
50 | |||
51 | #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) | ||
52 | |||
53 | #define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ | ||
54 | #define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ | ||
55 | #define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ | ||
56 | |||
57 | #define BTS_CONTROL \ | ||
58 | (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ | ||
59 | ds_cfg.ctl[dsf_bts_overflow]) | ||
48 | 60 | ||
49 | 61 | ||
50 | /* | 62 | /* |
63 | * A BTS or PEBS tracer. | ||
64 | * | ||
65 | * This holds the configuration of the tracer and serves as a handle | ||
66 | * to identify tracers. | ||
67 | */ | ||
68 | struct ds_tracer { | ||
69 | /* the DS context (partially) owned by this tracer */ | ||
70 | struct ds_context *context; | ||
71 | /* the buffer provided on ds_request() and its size in bytes */ | ||
72 | void *buffer; | ||
73 | size_t size; | ||
74 | }; | ||
75 | |||
76 | struct bts_tracer { | ||
77 | /* the common DS part */ | ||
78 | struct ds_tracer ds; | ||
79 | /* the trace including the DS configuration */ | ||
80 | struct bts_trace trace; | ||
81 | /* buffer overflow notification function */ | ||
82 | bts_ovfl_callback_t ovfl; | ||
83 | }; | ||
84 | |||
85 | struct pebs_tracer { | ||
86 | /* the common DS part */ | ||
87 | struct ds_tracer ds; | ||
88 | /* the trace including the DS configuration */ | ||
89 | struct pebs_trace trace; | ||
90 | /* buffer overflow notification function */ | ||
91 | pebs_ovfl_callback_t ovfl; | ||
92 | }; | ||
93 | |||
94 | /* | ||
51 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | 95 | * Debug Store (DS) save area configuration (see Intel64 and IA32 |
52 | * Architectures Software Developer's Manual, section 18.5) | 96 | * Architectures Software Developer's Manual, section 18.5) |
53 | * | 97 | * |
@@ -111,32 +155,9 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | |||
111 | 155 | ||
112 | 156 | ||
113 | /* | 157 | /* |
114 | * Locking is done only for allocating BTS or PEBS resources and for | 158 | * Locking is done only for allocating BTS or PEBS resources. |
115 | * guarding context and buffer memory allocation. | ||
116 | * | ||
117 | * Most functions require the current task to own the ds context part | ||
118 | * they are going to access. All the locking is done when validating | ||
119 | * access to the context. | ||
120 | */ | 159 | */ |
121 | static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); | 160 | static DEFINE_SPINLOCK(ds_lock); |
122 | |||
123 | /* | ||
124 | * Validate that the current task is allowed to access the BTS/PEBS | ||
125 | * buffer of the parameter task. | ||
126 | * | ||
127 | * Returns 0, if access is granted; -Eerrno, otherwise. | ||
128 | */ | ||
129 | static inline int ds_validate_access(struct ds_context *context, | ||
130 | enum ds_qualifier qual) | ||
131 | { | ||
132 | if (!context) | ||
133 | return -EPERM; | ||
134 | |||
135 | if (context->owner[qual] == current) | ||
136 | return 0; | ||
137 | |||
138 | return -EPERM; | ||
139 | } | ||
140 | 161 | ||
141 | 162 | ||
142 | /* | 163 | /* |
@@ -152,27 +173,32 @@ static inline int ds_validate_access(struct ds_context *context, | |||
152 | * >0 number of per-thread tracers | 173 | * >0 number of per-thread tracers |
153 | * <0 number of per-cpu tracers | 174 | * <0 number of per-cpu tracers |
154 | * | 175 | * |
155 | * The below functions to get and put tracers and to check the | ||
156 | * allocation type require the ds_lock to be held by the caller. | ||
157 | * | ||
158 | * Tracers essentially gives the number of ds contexts for a certain | 176 | * Tracers essentially gives the number of ds contexts for a certain |
159 | * type of allocation. | 177 | * type of allocation. |
160 | */ | 178 | */ |
161 | static long tracers; | 179 | static atomic_t tracers = ATOMIC_INIT(0); |
162 | 180 | ||
163 | static inline void get_tracer(struct task_struct *task) | 181 | static inline void get_tracer(struct task_struct *task) |
164 | { | 182 | { |
165 | tracers += (task ? 1 : -1); | 183 | if (task) |
184 | atomic_inc(&tracers); | ||
185 | else | ||
186 | atomic_dec(&tracers); | ||
166 | } | 187 | } |
167 | 188 | ||
168 | static inline void put_tracer(struct task_struct *task) | 189 | static inline void put_tracer(struct task_struct *task) |
169 | { | 190 | { |
170 | tracers -= (task ? 1 : -1); | 191 | if (task) |
192 | atomic_dec(&tracers); | ||
193 | else | ||
194 | atomic_inc(&tracers); | ||
171 | } | 195 | } |
172 | 196 | ||
173 | static inline int check_tracer(struct task_struct *task) | 197 | static inline int check_tracer(struct task_struct *task) |
174 | { | 198 | { |
175 | return (task ? (tracers >= 0) : (tracers <= 0)); | 199 | return task ? |
200 | (atomic_read(&tracers) >= 0) : | ||
201 | (atomic_read(&tracers) <= 0); | ||
176 | } | 202 | } |
177 | 203 | ||
178 | 204 | ||
@@ -185,116 +211,83 @@ static inline int check_tracer(struct task_struct *task) | |||
185 | * | 211 | * |
186 | * Contexts are use-counted. They are allocated on first access and | 212 | * Contexts are use-counted. They are allocated on first access and |
187 | * deallocated when the last user puts the context. | 213 | * deallocated when the last user puts the context. |
188 | * | ||
189 | * We distinguish between an allocating and a non-allocating get of a | ||
190 | * context: | ||
191 | * - the allocating get is used for requesting BTS/PEBS resources. It | ||
192 | * requires the caller to hold the global ds_lock. | ||
193 | * - the non-allocating get is used for all other cases. A | ||
194 | * non-existing context indicates an error. It acquires and releases | ||
195 | * the ds_lock itself for obtaining the context. | ||
196 | * | ||
197 | * A context and its DS configuration are allocated and deallocated | ||
198 | * together. A context always has a DS configuration of the | ||
199 | * appropriate size. | ||
200 | */ | 214 | */ |
201 | static DEFINE_PER_CPU(struct ds_context *, system_context); | 215 | struct ds_context { |
202 | 216 | /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ | |
203 | #define this_system_context per_cpu(system_context, smp_processor_id()) | 217 | unsigned char ds[MAX_SIZEOF_DS]; |
204 | 218 | /* the owner of the BTS and PEBS configuration, respectively */ | |
205 | /* | 219 | struct bts_tracer *bts_master; |
206 | * Returns the pointer to the parameter task's context or to the | 220 | struct pebs_tracer *pebs_master; |
207 | * system-wide context, if task is NULL. | 221 | /* use count */ |
208 | * | 222 | unsigned long count; |
209 | * Increases the use count of the returned context, if not NULL. | 223 | /* a pointer to the context location inside the thread_struct |
210 | */ | 224 | * or the per_cpu context array */ |
211 | static inline struct ds_context *ds_get_context(struct task_struct *task) | 225 | struct ds_context **this; |
212 | { | 226 | /* a pointer to the task owning this context, or NULL, if the |
213 | struct ds_context *context; | 227 | * context is owned by a cpu */ |
214 | 228 | struct task_struct *task; | |
215 | spin_lock(&ds_lock); | 229 | }; |
216 | 230 | ||
217 | context = (task ? task->thread.ds_ctx : this_system_context); | 231 | static DEFINE_PER_CPU(struct ds_context *, system_context_array); |
218 | if (context) | ||
219 | context->count++; | ||
220 | 232 | ||
221 | spin_unlock(&ds_lock); | 233 | #define system_context per_cpu(system_context_array, smp_processor_id()) |
222 | 234 | ||
223 | return context; | ||
224 | } | ||
225 | 235 | ||
226 | /* | 236 | static inline struct ds_context *ds_get_context(struct task_struct *task) |
227 | * Same as ds_get_context, but allocates the context and it's DS | ||
228 | * structure, if necessary; returns NULL; if out of memory. | ||
229 | * | ||
230 | * pre: requires ds_lock to be held | ||
231 | */ | ||
232 | static inline struct ds_context *ds_alloc_context(struct task_struct *task) | ||
233 | { | 237 | { |
234 | struct ds_context **p_context = | 238 | struct ds_context **p_context = |
235 | (task ? &task->thread.ds_ctx : &this_system_context); | 239 | (task ? &task->thread.ds_ctx : &system_context); |
236 | struct ds_context *context = *p_context; | 240 | struct ds_context *context = NULL; |
237 | 241 | struct ds_context *new_context = NULL; | |
238 | if (!context) { | 242 | unsigned long irq; |
239 | spin_unlock(&ds_lock); | 243 | |
240 | 244 | /* Chances are small that we already have a context. */ | |
241 | context = kzalloc(sizeof(*context), GFP_KERNEL); | 245 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); |
242 | 246 | if (!new_context) | |
243 | if (!context) { | 247 | return NULL; |
244 | spin_lock(&ds_lock); | ||
245 | return NULL; | ||
246 | } | ||
247 | 248 | ||
248 | context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | 249 | spin_lock_irqsave(&ds_lock, irq); |
249 | if (!context->ds) { | ||
250 | kfree(context); | ||
251 | spin_lock(&ds_lock); | ||
252 | return NULL; | ||
253 | } | ||
254 | 250 | ||
255 | spin_lock(&ds_lock); | 251 | context = *p_context; |
256 | /* | 252 | if (!context) { |
257 | * Check for race - another CPU could have allocated | 253 | context = new_context; |
258 | * it meanwhile: | ||
259 | */ | ||
260 | if (*p_context) { | ||
261 | kfree(context->ds); | ||
262 | kfree(context); | ||
263 | return *p_context; | ||
264 | } | ||
265 | |||
266 | *p_context = context; | ||
267 | 254 | ||
268 | context->this = p_context; | 255 | context->this = p_context; |
269 | context->task = task; | 256 | context->task = task; |
257 | context->count = 0; | ||
270 | 258 | ||
271 | if (task) | 259 | if (task) |
272 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | 260 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); |
273 | 261 | ||
274 | if (!task || (task == current)) | 262 | if (!task || (task == current)) |
275 | wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); | 263 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); |
276 | 264 | ||
277 | get_tracer(task); | 265 | *p_context = context; |
278 | } | 266 | } |
279 | 267 | ||
280 | context->count++; | 268 | context->count++; |
281 | 269 | ||
270 | spin_unlock_irqrestore(&ds_lock, irq); | ||
271 | |||
272 | if (context != new_context) | ||
273 | kfree(new_context); | ||
274 | |||
282 | return context; | 275 | return context; |
283 | } | 276 | } |
284 | 277 | ||
285 | /* | ||
286 | * Decreases the use count of the parameter context, if not NULL. | ||
287 | * Deallocates the context, if the use count reaches zero. | ||
288 | */ | ||
289 | static inline void ds_put_context(struct ds_context *context) | 278 | static inline void ds_put_context(struct ds_context *context) |
290 | { | 279 | { |
280 | unsigned long irq; | ||
281 | |||
291 | if (!context) | 282 | if (!context) |
292 | return; | 283 | return; |
293 | 284 | ||
294 | spin_lock(&ds_lock); | 285 | spin_lock_irqsave(&ds_lock, irq); |
295 | 286 | ||
296 | if (--context->count) | 287 | if (--context->count) { |
297 | goto out; | 288 | spin_unlock_irqrestore(&ds_lock, irq); |
289 | return; | ||
290 | } | ||
298 | 291 | ||
299 | *(context->this) = NULL; | 292 | *(context->this) = NULL; |
300 | 293 | ||
@@ -304,133 +297,263 @@ static inline void ds_put_context(struct ds_context *context) | |||
304 | if (!context->task || (context->task == current)) | 297 | if (!context->task || (context->task == current)) |
305 | wrmsrl(MSR_IA32_DS_AREA, 0); | 298 | wrmsrl(MSR_IA32_DS_AREA, 0); |
306 | 299 | ||
307 | put_tracer(context->task); | 300 | spin_unlock_irqrestore(&ds_lock, irq); |
308 | 301 | ||
309 | /* free any leftover buffers from tracers that did not | ||
310 | * deallocate them properly. */ | ||
311 | kfree(context->buffer[ds_bts]); | ||
312 | kfree(context->buffer[ds_pebs]); | ||
313 | kfree(context->ds); | ||
314 | kfree(context); | 302 | kfree(context); |
315 | out: | ||
316 | spin_unlock(&ds_lock); | ||
317 | } | 303 | } |
318 | 304 | ||
319 | 305 | ||
320 | /* | 306 | /* |
321 | * Handle a buffer overflow | 307 | * Call the tracer's callback on a buffer overflow. |
322 | * | 308 | * |
323 | * task: the task whose buffers are overflowing; | ||
324 | * NULL for a buffer overflow on the current cpu | ||
325 | * context: the ds context | 309 | * context: the ds context |
326 | * qual: the buffer type | 310 | * qual: the buffer type |
327 | */ | 311 | */ |
328 | static void ds_overflow(struct task_struct *task, struct ds_context *context, | 312 | static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) |
329 | enum ds_qualifier qual) | ||
330 | { | 313 | { |
331 | if (!context) | 314 | switch (qual) { |
332 | return; | 315 | case ds_bts: |
333 | 316 | if (context->bts_master && | |
334 | if (context->callback[qual]) | 317 | context->bts_master->ovfl) |
335 | (*context->callback[qual])(task); | 318 | context->bts_master->ovfl(context->bts_master); |
336 | 319 | break; | |
337 | /* todo: do some more overflow handling */ | 320 | case ds_pebs: |
321 | if (context->pebs_master && | ||
322 | context->pebs_master->ovfl) | ||
323 | context->pebs_master->ovfl(context->pebs_master); | ||
324 | break; | ||
325 | } | ||
338 | } | 326 | } |
339 | 327 | ||
340 | 328 | ||
341 | /* | 329 | /* |
342 | * Allocate a non-pageable buffer of the parameter size. | 330 | * Write raw data into the BTS or PEBS buffer. |
343 | * Checks the memory and the locked memory rlimit. | ||
344 | * | 331 | * |
345 | * Returns the buffer, if successful; | 332 | * The remainder of any partially written record is zeroed out. |
346 | * NULL, if out of memory or rlimit exceeded. | ||
347 | * | 333 | * |
348 | * size: the requested buffer size in bytes | 334 | * context: the DS context |
349 | * pages (out): if not NULL, contains the number of pages reserved | 335 | * qual: the buffer type |
336 | * record: the data to write | ||
337 | * size: the size of the data | ||
350 | */ | 338 | */ |
351 | static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) | 339 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, |
340 | const void *record, size_t size) | ||
352 | { | 341 | { |
353 | unsigned long rlim, vm, pgsz; | 342 | int bytes_written = 0; |
354 | void *buffer; | ||
355 | 343 | ||
356 | pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; | 344 | if (!record) |
345 | return -EINVAL; | ||
357 | 346 | ||
358 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | 347 | while (size) { |
359 | vm = current->mm->total_vm + pgsz; | 348 | unsigned long base, index, end, write_end, int_th; |
360 | if (rlim < vm) | 349 | unsigned long write_size, adj_write_size; |
361 | return NULL; | ||
362 | 350 | ||
363 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | 351 | /* |
364 | vm = current->mm->locked_vm + pgsz; | 352 | * write as much as possible without producing an |
365 | if (rlim < vm) | 353 | * overflow interrupt. |
366 | return NULL; | 354 | * |
355 | * interrupt_threshold must either be | ||
356 | * - bigger than absolute_maximum or | ||
357 | * - point to a record between buffer_base and absolute_maximum | ||
358 | * | ||
359 | * index points to a valid record. | ||
360 | */ | ||
361 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
362 | index = ds_get(context->ds, qual, ds_index); | ||
363 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
364 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
367 | 365 | ||
368 | buffer = kzalloc(size, GFP_KERNEL); | 366 | write_end = min(end, int_th); |
369 | if (!buffer) | 367 | |
370 | return NULL; | 368 | /* if we are already beyond the interrupt threshold, |
369 | * we fill the entire buffer */ | ||
370 | if (write_end <= index) | ||
371 | write_end = end; | ||
372 | |||
373 | if (write_end <= index) | ||
374 | break; | ||
375 | |||
376 | write_size = min((unsigned long) size, write_end - index); | ||
377 | memcpy((void *)index, record, write_size); | ||
378 | |||
379 | record = (const char *)record + write_size; | ||
380 | size -= write_size; | ||
381 | bytes_written += write_size; | ||
371 | 382 | ||
372 | current->mm->total_vm += pgsz; | 383 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; |
373 | current->mm->locked_vm += pgsz; | 384 | adj_write_size *= ds_cfg.sizeof_rec[qual]; |
374 | 385 | ||
375 | if (pages) | 386 | /* zero out trailing bytes */ |
376 | *pages = pgsz; | 387 | memset((char *)index + write_size, 0, |
388 | adj_write_size - write_size); | ||
389 | index += adj_write_size; | ||
377 | 390 | ||
378 | return buffer; | 391 | if (index >= end) |
392 | index = base; | ||
393 | ds_set(context->ds, qual, ds_index, index); | ||
394 | |||
395 | if (index >= int_th) | ||
396 | ds_overflow(context, qual); | ||
397 | } | ||
398 | |||
399 | return bytes_written; | ||
379 | } | 400 | } |
380 | 401 | ||
381 | static int ds_request(struct task_struct *task, void *base, size_t size, | 402 | |
382 | ds_ovfl_callback_t ovfl, enum ds_qualifier qual) | 403 | /* |
404 | * Branch Trace Store (BTS) uses the following format. Different | ||
405 | * architectures vary in the size of those fields. | ||
406 | * - source linear address | ||
407 | * - destination linear address | ||
408 | * - flags | ||
409 | * | ||
410 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
411 | * architectures use 32bit pointers in 32bit mode. | ||
412 | * | ||
413 | * We compute the base address for the first 8 fields based on: | ||
414 | * - the field size stored in the DS configuration | ||
415 | * - the relative field position | ||
416 | * | ||
417 | * In order to store additional information in the BTS buffer, we use | ||
418 | * a special source address to indicate that the record requires | ||
419 | * special interpretation. | ||
420 | * | ||
421 | * Netburst indicated via a bit in the flags field whether the branch | ||
422 | * was predicted; this is ignored. | ||
423 | * | ||
424 | * We use two levels of abstraction: | ||
425 | * - the raw data level defined here | ||
426 | * - an arch-independent level defined in ds.h | ||
427 | */ | ||
428 | |||
429 | enum bts_field { | ||
430 | bts_from, | ||
431 | bts_to, | ||
432 | bts_flags, | ||
433 | |||
434 | bts_qual = bts_from, | ||
435 | bts_jiffies = bts_to, | ||
436 | bts_pid = bts_flags, | ||
437 | |||
438 | bts_qual_mask = (bts_qual_max - 1), | ||
439 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | ||
440 | }; | ||
441 | |||
442 | static inline unsigned long bts_get(const char *base, enum bts_field field) | ||
383 | { | 443 | { |
384 | struct ds_context *context; | 444 | base += (ds_cfg.sizeof_field * field); |
385 | unsigned long buffer, adj; | 445 | return *(unsigned long *)base; |
386 | const unsigned long alignment = (1 << 3); | 446 | } |
387 | int error = 0; | ||
388 | 447 | ||
389 | if (!ds_cfg.sizeof_ds) | 448 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) |
390 | return -EOPNOTSUPP; | 449 | { |
450 | base += (ds_cfg.sizeof_field * field);; | ||
451 | (*(unsigned long *)base) = val; | ||
452 | } | ||
391 | 453 | ||
392 | /* we require some space to do alignment adjustments below */ | 454 | |
393 | if (size < (alignment + ds_cfg.sizeof_rec[qual])) | 455 | /* |
456 | * The raw BTS data is architecture dependent. | ||
457 | * | ||
458 | * For higher-level users, we give an arch-independent view. | ||
459 | * - ds.h defines struct bts_struct | ||
460 | * - bts_read translates one raw bts record into a bts_struct | ||
461 | * - bts_write translates one bts_struct into the raw format and | ||
462 | * writes it into the top of the parameter tracer's buffer. | ||
463 | * | ||
464 | * return: bytes read/written on success; -Eerrno, otherwise | ||
465 | */ | ||
466 | static int bts_read(struct bts_tracer *tracer, const void *at, | ||
467 | struct bts_struct *out) | ||
468 | { | ||
469 | if (!tracer) | ||
394 | return -EINVAL; | 470 | return -EINVAL; |
395 | 471 | ||
396 | /* buffer overflow notification is not yet implemented */ | 472 | if (at < tracer->trace.ds.begin) |
397 | if (ovfl) | 473 | return -EINVAL; |
398 | return -EOPNOTSUPP; | ||
399 | 474 | ||
475 | if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) | ||
476 | return -EINVAL; | ||
400 | 477 | ||
401 | spin_lock(&ds_lock); | 478 | memset(out, 0, sizeof(*out)); |
479 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { | ||
480 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); | ||
481 | out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); | ||
482 | out->variant.timestamp.pid = bts_get(at, bts_pid); | ||
483 | } else { | ||
484 | out->qualifier = bts_branch; | ||
485 | out->variant.lbr.from = bts_get(at, bts_from); | ||
486 | out->variant.lbr.to = bts_get(at, bts_to); | ||
487 | |||
488 | if (!out->variant.lbr.from && !out->variant.lbr.to) | ||
489 | out->qualifier = bts_invalid; | ||
490 | } | ||
402 | 491 | ||
403 | error = -ENOMEM; | 492 | return ds_cfg.sizeof_rec[ds_bts]; |
404 | context = ds_alloc_context(task); | 493 | } |
405 | if (!context) | ||
406 | goto out_unlock; | ||
407 | 494 | ||
408 | error = -EPERM; | 495 | static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) |
409 | if (!check_tracer(task)) | 496 | { |
410 | goto out_unlock; | 497 | unsigned char raw[MAX_SIZEOF_BTS]; |
411 | 498 | ||
412 | error = -EALREADY; | 499 | if (!tracer) |
413 | if (context->owner[qual] == current) | 500 | return -EINVAL; |
414 | goto out_unlock; | ||
415 | error = -EPERM; | ||
416 | if (context->owner[qual] != NULL) | ||
417 | goto out_unlock; | ||
418 | context->owner[qual] = current; | ||
419 | 501 | ||
420 | spin_unlock(&ds_lock); | 502 | if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) |
503 | return -EOVERFLOW; | ||
421 | 504 | ||
505 | switch (in->qualifier) { | ||
506 | case bts_invalid: | ||
507 | bts_set(raw, bts_from, 0); | ||
508 | bts_set(raw, bts_to, 0); | ||
509 | bts_set(raw, bts_flags, 0); | ||
510 | break; | ||
511 | case bts_branch: | ||
512 | bts_set(raw, bts_from, in->variant.lbr.from); | ||
513 | bts_set(raw, bts_to, in->variant.lbr.to); | ||
514 | bts_set(raw, bts_flags, 0); | ||
515 | break; | ||
516 | case bts_task_arrives: | ||
517 | case bts_task_departs: | ||
518 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | ||
519 | bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); | ||
520 | bts_set(raw, bts_pid, in->variant.timestamp.pid); | ||
521 | break; | ||
522 | default: | ||
523 | return -EINVAL; | ||
524 | } | ||
422 | 525 | ||
423 | error = -ENOMEM; | 526 | return ds_write(tracer->ds.context, ds_bts, raw, |
424 | if (!base) { | 527 | ds_cfg.sizeof_rec[ds_bts]); |
425 | base = ds_allocate_buffer(size, &context->pages[qual]); | 528 | } |
426 | if (!base) | ||
427 | goto out_release; | ||
428 | 529 | ||
429 | context->buffer[qual] = base; | ||
430 | } | ||
431 | error = 0; | ||
432 | 530 | ||
433 | context->callback[qual] = ovfl; | 531 | static void ds_write_config(struct ds_context *context, |
532 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
533 | { | ||
534 | unsigned char *ds = context->ds; | ||
535 | |||
536 | ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); | ||
537 | ds_set(ds, qual, ds_index, (unsigned long)cfg->top); | ||
538 | ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); | ||
539 | ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); | ||
540 | } | ||
541 | |||
542 | static void ds_read_config(struct ds_context *context, | ||
543 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
544 | { | ||
545 | unsigned char *ds = context->ds; | ||
546 | |||
547 | cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); | ||
548 | cfg->top = (void *)ds_get(ds, qual, ds_index); | ||
549 | cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); | ||
550 | cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); | ||
551 | } | ||
552 | |||
553 | static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | ||
554 | void *base, size_t size, size_t ith, | ||
555 | unsigned int flags) { | ||
556 | unsigned long buffer, adj; | ||
434 | 557 | ||
435 | /* adjust the buffer address and size to meet alignment | 558 | /* adjust the buffer address and size to meet alignment |
436 | * constraints: | 559 | * constraints: |
@@ -442,395 +565,383 @@ static int ds_request(struct task_struct *task, void *base, size_t size, | |||
442 | */ | 565 | */ |
443 | buffer = (unsigned long)base; | 566 | buffer = (unsigned long)base; |
444 | 567 | ||
445 | adj = ALIGN(buffer, alignment) - buffer; | 568 | adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; |
446 | buffer += adj; | 569 | buffer += adj; |
447 | size -= adj; | 570 | size -= adj; |
448 | 571 | ||
449 | size /= ds_cfg.sizeof_rec[qual]; | 572 | trace->n = size / ds_cfg.sizeof_rec[qual]; |
450 | size *= ds_cfg.sizeof_rec[qual]; | 573 | trace->size = ds_cfg.sizeof_rec[qual]; |
451 | |||
452 | ds_set(context->ds, qual, ds_buffer_base, buffer); | ||
453 | ds_set(context->ds, qual, ds_index, buffer); | ||
454 | ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); | ||
455 | |||
456 | if (ovfl) { | ||
457 | /* todo: select a suitable interrupt threshold */ | ||
458 | } else | ||
459 | ds_set(context->ds, qual, | ||
460 | ds_interrupt_threshold, buffer + size + 1); | ||
461 | 574 | ||
462 | /* we keep the context until ds_release */ | 575 | size = (trace->n * trace->size); |
463 | return error; | ||
464 | |||
465 | out_release: | ||
466 | context->owner[qual] = NULL; | ||
467 | ds_put_context(context); | ||
468 | return error; | ||
469 | 576 | ||
470 | out_unlock: | 577 | trace->begin = (void *)buffer; |
471 | spin_unlock(&ds_lock); | 578 | trace->top = trace->begin; |
472 | ds_put_context(context); | 579 | trace->end = (void *)(buffer + size); |
473 | return error; | 580 | /* The value for 'no threshold' is -1, which will set the |
474 | } | 581 | * threshold outside of the buffer, just like we want it. |
582 | */ | ||
583 | trace->ith = (void *)(buffer + size - ith); | ||
475 | 584 | ||
476 | int ds_request_bts(struct task_struct *task, void *base, size_t size, | 585 | trace->flags = flags; |
477 | ds_ovfl_callback_t ovfl) | ||
478 | { | ||
479 | return ds_request(task, base, size, ovfl, ds_bts); | ||
480 | } | 586 | } |
481 | 587 | ||
482 | int ds_request_pebs(struct task_struct *task, void *base, size_t size, | ||
483 | ds_ovfl_callback_t ovfl) | ||
484 | { | ||
485 | return ds_request(task, base, size, ovfl, ds_pebs); | ||
486 | } | ||
487 | 588 | ||
488 | static int ds_release(struct task_struct *task, enum ds_qualifier qual) | 589 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, |
590 | enum ds_qualifier qual, struct task_struct *task, | ||
591 | void *base, size_t size, size_t th, unsigned int flags) | ||
489 | { | 592 | { |
490 | struct ds_context *context; | 593 | struct ds_context *context; |
491 | int error; | 594 | int error; |
492 | 595 | ||
493 | context = ds_get_context(task); | 596 | error = -EINVAL; |
494 | error = ds_validate_access(context, qual); | 597 | if (!base) |
495 | if (error < 0) | ||
496 | goto out; | 598 | goto out; |
497 | 599 | ||
498 | kfree(context->buffer[qual]); | 600 | /* we require some space to do alignment adjustments below */ |
499 | context->buffer[qual] = NULL; | 601 | error = -EINVAL; |
500 | 602 | if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) | |
501 | current->mm->total_vm -= context->pages[qual]; | 603 | goto out; |
502 | current->mm->locked_vm -= context->pages[qual]; | ||
503 | context->pages[qual] = 0; | ||
504 | context->owner[qual] = NULL; | ||
505 | |||
506 | /* | ||
507 | * we put the context twice: | ||
508 | * once for the ds_get_context | ||
509 | * once for the corresponding ds_request | ||
510 | */ | ||
511 | ds_put_context(context); | ||
512 | out: | ||
513 | ds_put_context(context); | ||
514 | return error; | ||
515 | } | ||
516 | 604 | ||
517 | int ds_release_bts(struct task_struct *task) | 605 | if (th != (size_t)-1) { |
518 | { | 606 | th *= ds_cfg.sizeof_rec[qual]; |
519 | return ds_release(task, ds_bts); | ||
520 | } | ||
521 | 607 | ||
522 | int ds_release_pebs(struct task_struct *task) | 608 | error = -EINVAL; |
523 | { | 609 | if (size <= th) |
524 | return ds_release(task, ds_pebs); | 610 | goto out; |
525 | } | 611 | } |
526 | 612 | ||
527 | static int ds_get_index(struct task_struct *task, size_t *pos, | 613 | tracer->buffer = base; |
528 | enum ds_qualifier qual) | 614 | tracer->size = size; |
529 | { | ||
530 | struct ds_context *context; | ||
531 | unsigned long base, index; | ||
532 | int error; | ||
533 | 615 | ||
616 | error = -ENOMEM; | ||
534 | context = ds_get_context(task); | 617 | context = ds_get_context(task); |
535 | error = ds_validate_access(context, qual); | 618 | if (!context) |
536 | if (error < 0) | ||
537 | goto out; | 619 | goto out; |
620 | tracer->context = context; | ||
538 | 621 | ||
539 | base = ds_get(context->ds, qual, ds_buffer_base); | 622 | ds_init_ds_trace(trace, qual, base, size, th, flags); |
540 | index = ds_get(context->ds, qual, ds_index); | ||
541 | 623 | ||
542 | error = ((index - base) / ds_cfg.sizeof_rec[qual]); | 624 | error = 0; |
543 | if (pos) | ||
544 | *pos = error; | ||
545 | out: | 625 | out: |
546 | ds_put_context(context); | ||
547 | return error; | 626 | return error; |
548 | } | 627 | } |
549 | 628 | ||
550 | int ds_get_bts_index(struct task_struct *task, size_t *pos) | 629 | struct bts_tracer *ds_request_bts(struct task_struct *task, |
630 | void *base, size_t size, | ||
631 | bts_ovfl_callback_t ovfl, size_t th, | ||
632 | unsigned int flags) | ||
551 | { | 633 | { |
552 | return ds_get_index(task, pos, ds_bts); | 634 | struct bts_tracer *tracer; |
553 | } | 635 | unsigned long irq; |
554 | |||
555 | int ds_get_pebs_index(struct task_struct *task, size_t *pos) | ||
556 | { | ||
557 | return ds_get_index(task, pos, ds_pebs); | ||
558 | } | ||
559 | |||
560 | static int ds_get_end(struct task_struct *task, size_t *pos, | ||
561 | enum ds_qualifier qual) | ||
562 | { | ||
563 | struct ds_context *context; | ||
564 | unsigned long base, end; | ||
565 | int error; | 636 | int error; |
566 | 637 | ||
567 | context = ds_get_context(task); | 638 | error = -EOPNOTSUPP; |
568 | error = ds_validate_access(context, qual); | 639 | if (!ds_cfg.ctl[dsf_bts]) |
569 | if (error < 0) | ||
570 | goto out; | 640 | goto out; |
571 | 641 | ||
572 | base = ds_get(context->ds, qual, ds_buffer_base); | 642 | /* buffer overflow notification is not yet implemented */ |
573 | end = ds_get(context->ds, qual, ds_absolute_maximum); | 643 | error = -EOPNOTSUPP; |
644 | if (ovfl) | ||
645 | goto out; | ||
574 | 646 | ||
575 | error = ((end - base) / ds_cfg.sizeof_rec[qual]); | 647 | error = -ENOMEM; |
576 | if (pos) | 648 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
577 | *pos = error; | 649 | if (!tracer) |
578 | out: | 650 | goto out; |
579 | ds_put_context(context); | 651 | tracer->ovfl = ovfl; |
580 | return error; | ||
581 | } | ||
582 | 652 | ||
583 | int ds_get_bts_end(struct task_struct *task, size_t *pos) | 653 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
584 | { | 654 | ds_bts, task, base, size, th, flags); |
585 | return ds_get_end(task, pos, ds_bts); | 655 | if (error < 0) |
586 | } | 656 | goto out_tracer; |
587 | 657 | ||
588 | int ds_get_pebs_end(struct task_struct *task, size_t *pos) | ||
589 | { | ||
590 | return ds_get_end(task, pos, ds_pebs); | ||
591 | } | ||
592 | 658 | ||
593 | static int ds_access(struct task_struct *task, size_t index, | 659 | spin_lock_irqsave(&ds_lock, irq); |
594 | const void **record, enum ds_qualifier qual) | ||
595 | { | ||
596 | struct ds_context *context; | ||
597 | unsigned long base, idx; | ||
598 | int error; | ||
599 | 660 | ||
600 | if (!record) | 661 | error = -EPERM; |
601 | return -EINVAL; | 662 | if (!check_tracer(task)) |
663 | goto out_unlock; | ||
664 | get_tracer(task); | ||
602 | 665 | ||
603 | context = ds_get_context(task); | 666 | error = -EPERM; |
604 | error = ds_validate_access(context, qual); | 667 | if (tracer->ds.context->bts_master) |
605 | if (error < 0) | 668 | goto out_put_tracer; |
606 | goto out; | 669 | tracer->ds.context->bts_master = tracer; |
607 | 670 | ||
608 | base = ds_get(context->ds, qual, ds_buffer_base); | 671 | spin_unlock_irqrestore(&ds_lock, irq); |
609 | idx = base + (index * ds_cfg.sizeof_rec[qual]); | ||
610 | 672 | ||
611 | error = -EINVAL; | ||
612 | if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) | ||
613 | goto out; | ||
614 | 673 | ||
615 | *record = (const void *)idx; | 674 | tracer->trace.read = bts_read; |
616 | error = ds_cfg.sizeof_rec[qual]; | 675 | tracer->trace.write = bts_write; |
617 | out: | ||
618 | ds_put_context(context); | ||
619 | return error; | ||
620 | } | ||
621 | 676 | ||
622 | int ds_access_bts(struct task_struct *task, size_t index, const void **record) | 677 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); |
623 | { | 678 | ds_resume_bts(tracer); |
624 | return ds_access(task, index, record, ds_bts); | ||
625 | } | ||
626 | 679 | ||
627 | int ds_access_pebs(struct task_struct *task, size_t index, const void **record) | 680 | return tracer; |
628 | { | 681 | |
629 | return ds_access(task, index, record, ds_pebs); | 682 | out_put_tracer: |
683 | put_tracer(task); | ||
684 | out_unlock: | ||
685 | spin_unlock_irqrestore(&ds_lock, irq); | ||
686 | ds_put_context(tracer->ds.context); | ||
687 | out_tracer: | ||
688 | kfree(tracer); | ||
689 | out: | ||
690 | return ERR_PTR(error); | ||
630 | } | 691 | } |
631 | 692 | ||
632 | static int ds_write(struct task_struct *task, const void *record, size_t size, | 693 | struct pebs_tracer *ds_request_pebs(struct task_struct *task, |
633 | enum ds_qualifier qual, int force) | 694 | void *base, size_t size, |
695 | pebs_ovfl_callback_t ovfl, size_t th, | ||
696 | unsigned int flags) | ||
634 | { | 697 | { |
635 | struct ds_context *context; | 698 | struct pebs_tracer *tracer; |
699 | unsigned long irq; | ||
636 | int error; | 700 | int error; |
637 | 701 | ||
638 | if (!record) | 702 | /* buffer overflow notification is not yet implemented */ |
639 | return -EINVAL; | 703 | error = -EOPNOTSUPP; |
704 | if (ovfl) | ||
705 | goto out; | ||
640 | 706 | ||
641 | error = -EPERM; | 707 | error = -ENOMEM; |
642 | context = ds_get_context(task); | 708 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
643 | if (!context) | 709 | if (!tracer) |
644 | goto out; | 710 | goto out; |
711 | tracer->ovfl = ovfl; | ||
645 | 712 | ||
646 | if (!force) { | 713 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
647 | error = ds_validate_access(context, qual); | 714 | ds_pebs, task, base, size, th, flags); |
648 | if (error < 0) | 715 | if (error < 0) |
649 | goto out; | 716 | goto out_tracer; |
650 | } | ||
651 | 717 | ||
652 | error = 0; | 718 | spin_lock_irqsave(&ds_lock, irq); |
653 | while (size) { | ||
654 | unsigned long base, index, end, write_end, int_th; | ||
655 | unsigned long write_size, adj_write_size; | ||
656 | 719 | ||
657 | /* | 720 | error = -EPERM; |
658 | * write as much as possible without producing an | 721 | if (!check_tracer(task)) |
659 | * overflow interrupt. | 722 | goto out_unlock; |
660 | * | 723 | get_tracer(task); |
661 | * interrupt_threshold must either be | ||
662 | * - bigger than absolute_maximum or | ||
663 | * - point to a record between buffer_base and absolute_maximum | ||
664 | * | ||
665 | * index points to a valid record. | ||
666 | */ | ||
667 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
668 | index = ds_get(context->ds, qual, ds_index); | ||
669 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
670 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
671 | 724 | ||
672 | write_end = min(end, int_th); | 725 | error = -EPERM; |
726 | if (tracer->ds.context->pebs_master) | ||
727 | goto out_put_tracer; | ||
728 | tracer->ds.context->pebs_master = tracer; | ||
673 | 729 | ||
674 | /* if we are already beyond the interrupt threshold, | 730 | spin_unlock_irqrestore(&ds_lock, irq); |
675 | * we fill the entire buffer */ | ||
676 | if (write_end <= index) | ||
677 | write_end = end; | ||
678 | 731 | ||
679 | if (write_end <= index) | 732 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); |
680 | goto out; | 733 | ds_resume_pebs(tracer); |
681 | 734 | ||
682 | write_size = min((unsigned long) size, write_end - index); | 735 | return tracer; |
683 | memcpy((void *)index, record, write_size); | ||
684 | 736 | ||
685 | record = (const char *)record + write_size; | 737 | out_put_tracer: |
686 | size -= write_size; | 738 | put_tracer(task); |
687 | error += write_size; | 739 | out_unlock: |
740 | spin_unlock_irqrestore(&ds_lock, irq); | ||
741 | ds_put_context(tracer->ds.context); | ||
742 | out_tracer: | ||
743 | kfree(tracer); | ||
744 | out: | ||
745 | return ERR_PTR(error); | ||
746 | } | ||
688 | 747 | ||
689 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | 748 | void ds_release_bts(struct bts_tracer *tracer) |
690 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | 749 | { |
750 | if (!tracer) | ||
751 | return; | ||
691 | 752 | ||
692 | /* zero out trailing bytes */ | 753 | ds_suspend_bts(tracer); |
693 | memset((char *)index + write_size, 0, | ||
694 | adj_write_size - write_size); | ||
695 | index += adj_write_size; | ||
696 | 754 | ||
697 | if (index >= end) | 755 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); |
698 | index = base; | 756 | tracer->ds.context->bts_master = NULL; |
699 | ds_set(context->ds, qual, ds_index, index); | ||
700 | 757 | ||
701 | if (index >= int_th) | 758 | put_tracer(tracer->ds.context->task); |
702 | ds_overflow(task, context, qual); | 759 | ds_put_context(tracer->ds.context); |
703 | } | ||
704 | 760 | ||
705 | out: | 761 | kfree(tracer); |
706 | ds_put_context(context); | ||
707 | return error; | ||
708 | } | 762 | } |
709 | 763 | ||
710 | int ds_write_bts(struct task_struct *task, const void *record, size_t size) | 764 | void ds_suspend_bts(struct bts_tracer *tracer) |
711 | { | 765 | { |
712 | return ds_write(task, record, size, ds_bts, /* force = */ 0); | 766 | struct task_struct *task; |
713 | } | ||
714 | 767 | ||
715 | int ds_write_pebs(struct task_struct *task, const void *record, size_t size) | 768 | if (!tracer) |
716 | { | 769 | return; |
717 | return ds_write(task, record, size, ds_pebs, /* force = */ 0); | ||
718 | } | ||
719 | 770 | ||
720 | int ds_unchecked_write_bts(struct task_struct *task, | 771 | task = tracer->ds.context->task; |
721 | const void *record, size_t size) | ||
722 | { | ||
723 | return ds_write(task, record, size, ds_bts, /* force = */ 1); | ||
724 | } | ||
725 | 772 | ||
726 | int ds_unchecked_write_pebs(struct task_struct *task, | 773 | if (!task || (task == current)) |
727 | const void *record, size_t size) | 774 | update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); |
728 | { | 775 | |
729 | return ds_write(task, record, size, ds_pebs, /* force = */ 1); | 776 | if (task) { |
777 | task->thread.debugctlmsr &= ~BTS_CONTROL; | ||
778 | |||
779 | if (!task->thread.debugctlmsr) | ||
780 | clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | ||
781 | } | ||
730 | } | 782 | } |
731 | 783 | ||
732 | static int ds_reset_or_clear(struct task_struct *task, | 784 | void ds_resume_bts(struct bts_tracer *tracer) |
733 | enum ds_qualifier qual, int clear) | ||
734 | { | 785 | { |
735 | struct ds_context *context; | 786 | struct task_struct *task; |
736 | unsigned long base, end; | 787 | unsigned long control; |
737 | int error; | ||
738 | 788 | ||
739 | context = ds_get_context(task); | 789 | if (!tracer) |
740 | error = ds_validate_access(context, qual); | 790 | return; |
741 | if (error < 0) | ||
742 | goto out; | ||
743 | 791 | ||
744 | base = ds_get(context->ds, qual, ds_buffer_base); | 792 | task = tracer->ds.context->task; |
745 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
746 | 793 | ||
747 | if (clear) | 794 | control = ds_cfg.ctl[dsf_bts]; |
748 | memset((void *)base, 0, end - base); | 795 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) |
796 | control |= ds_cfg.ctl[dsf_bts_kernel]; | ||
797 | if (!(tracer->trace.ds.flags & BTS_USER)) | ||
798 | control |= ds_cfg.ctl[dsf_bts_user]; | ||
749 | 799 | ||
750 | ds_set(context->ds, qual, ds_index, base); | 800 | if (task) { |
801 | task->thread.debugctlmsr |= control; | ||
802 | set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | ||
803 | } | ||
751 | 804 | ||
752 | error = 0; | 805 | if (!task || (task == current)) |
753 | out: | 806 | update_debugctlmsr(get_debugctlmsr() | control); |
754 | ds_put_context(context); | ||
755 | return error; | ||
756 | } | 807 | } |
757 | 808 | ||
758 | int ds_reset_bts(struct task_struct *task) | 809 | void ds_release_pebs(struct pebs_tracer *tracer) |
759 | { | 810 | { |
760 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); | 811 | if (!tracer) |
812 | return; | ||
813 | |||
814 | ds_suspend_pebs(tracer); | ||
815 | |||
816 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); | ||
817 | tracer->ds.context->pebs_master = NULL; | ||
818 | |||
819 | put_tracer(tracer->ds.context->task); | ||
820 | ds_put_context(tracer->ds.context); | ||
821 | |||
822 | kfree(tracer); | ||
761 | } | 823 | } |
762 | 824 | ||
763 | int ds_reset_pebs(struct task_struct *task) | 825 | void ds_suspend_pebs(struct pebs_tracer *tracer) |
764 | { | 826 | { |
765 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); | 827 | |
766 | } | 828 | } |
767 | 829 | ||
768 | int ds_clear_bts(struct task_struct *task) | 830 | void ds_resume_pebs(struct pebs_tracer *tracer) |
769 | { | 831 | { |
770 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); | 832 | |
771 | } | 833 | } |
772 | 834 | ||
773 | int ds_clear_pebs(struct task_struct *task) | 835 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) |
774 | { | 836 | { |
775 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); | 837 | if (!tracer) |
838 | return NULL; | ||
839 | |||
840 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
841 | return &tracer->trace; | ||
776 | } | 842 | } |
777 | 843 | ||
778 | int ds_get_pebs_reset(struct task_struct *task, u64 *value) | 844 | const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) |
779 | { | 845 | { |
780 | struct ds_context *context; | 846 | if (!tracer) |
781 | int error; | 847 | return NULL; |
782 | 848 | ||
783 | if (!value) | 849 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); |
850 | tracer->trace.reset_value = | ||
851 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); | ||
852 | |||
853 | return &tracer->trace; | ||
854 | } | ||
855 | |||
856 | int ds_reset_bts(struct bts_tracer *tracer) | ||
857 | { | ||
858 | if (!tracer) | ||
784 | return -EINVAL; | 859 | return -EINVAL; |
785 | 860 | ||
786 | context = ds_get_context(task); | 861 | tracer->trace.ds.top = tracer->trace.ds.begin; |
787 | error = ds_validate_access(context, ds_pebs); | ||
788 | if (error < 0) | ||
789 | goto out; | ||
790 | 862 | ||
791 | *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); | 863 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
864 | (unsigned long)tracer->trace.ds.top); | ||
792 | 865 | ||
793 | error = 0; | 866 | return 0; |
794 | out: | ||
795 | ds_put_context(context); | ||
796 | return error; | ||
797 | } | 867 | } |
798 | 868 | ||
799 | int ds_set_pebs_reset(struct task_struct *task, u64 value) | 869 | int ds_reset_pebs(struct pebs_tracer *tracer) |
800 | { | 870 | { |
801 | struct ds_context *context; | 871 | if (!tracer) |
802 | int error; | 872 | return -EINVAL; |
803 | 873 | ||
804 | context = ds_get_context(task); | 874 | tracer->trace.ds.top = tracer->trace.ds.begin; |
805 | error = ds_validate_access(context, ds_pebs); | ||
806 | if (error < 0) | ||
807 | goto out; | ||
808 | 875 | ||
809 | *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; | 876 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
877 | (unsigned long)tracer->trace.ds.top); | ||
810 | 878 | ||
811 | error = 0; | 879 | return 0; |
812 | out: | 880 | } |
813 | ds_put_context(context); | 881 | |
814 | return error; | 882 | int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) |
883 | { | ||
884 | if (!tracer) | ||
885 | return -EINVAL; | ||
886 | |||
887 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; | ||
888 | |||
889 | return 0; | ||
815 | } | 890 | } |
816 | 891 | ||
817 | static const struct ds_configuration ds_cfg_var = { | 892 | static const struct ds_configuration ds_cfg_netburst = { |
818 | .sizeof_ds = sizeof(long) * 12, | 893 | .name = "netburst", |
819 | .sizeof_field = sizeof(long), | 894 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), |
820 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | 895 | .ctl[dsf_bts_kernel] = (1 << 5), |
821 | .sizeof_rec[ds_pebs] = sizeof(long) * 10 | 896 | .ctl[dsf_bts_user] = (1 << 6), |
897 | |||
898 | .sizeof_field = sizeof(long), | ||
899 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
900 | #ifdef __i386__ | ||
901 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, | ||
902 | #else | ||
903 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, | ||
904 | #endif | ||
905 | }; | ||
906 | static const struct ds_configuration ds_cfg_pentium_m = { | ||
907 | .name = "pentium m", | ||
908 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
909 | |||
910 | .sizeof_field = sizeof(long), | ||
911 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
912 | #ifdef __i386__ | ||
913 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, | ||
914 | #else | ||
915 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, | ||
916 | #endif | ||
822 | }; | 917 | }; |
823 | static const struct ds_configuration ds_cfg_64 = { | 918 | static const struct ds_configuration ds_cfg_core2 = { |
824 | .sizeof_ds = 8 * 12, | 919 | .name = "core 2", |
825 | .sizeof_field = 8, | 920 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
826 | .sizeof_rec[ds_bts] = 8 * 3, | 921 | .ctl[dsf_bts_kernel] = (1 << 9), |
827 | .sizeof_rec[ds_pebs] = 8 * 10 | 922 | .ctl[dsf_bts_user] = (1 << 10), |
923 | |||
924 | .sizeof_field = 8, | ||
925 | .sizeof_rec[ds_bts] = 8 * 3, | ||
926 | .sizeof_rec[ds_pebs] = 8 * 18, | ||
828 | }; | 927 | }; |
829 | 928 | ||
830 | static inline void | 929 | static void |
831 | ds_configure(const struct ds_configuration *cfg) | 930 | ds_configure(const struct ds_configuration *cfg) |
832 | { | 931 | { |
932 | memset(&ds_cfg, 0, sizeof(ds_cfg)); | ||
833 | ds_cfg = *cfg; | 933 | ds_cfg = *cfg; |
934 | |||
935 | printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); | ||
936 | |||
937 | if (!cpu_has_bts) { | ||
938 | ds_cfg.ctl[dsf_bts] = 0; | ||
939 | printk(KERN_INFO "[ds] bts not available\n"); | ||
940 | } | ||
941 | if (!cpu_has_pebs) | ||
942 | printk(KERN_INFO "[ds] pebs not available\n"); | ||
943 | |||
944 | WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); | ||
834 | } | 945 | } |
835 | 946 | ||
836 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | 947 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) |
@@ -838,16 +949,15 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
838 | switch (c->x86) { | 949 | switch (c->x86) { |
839 | case 0x6: | 950 | case 0x6: |
840 | switch (c->x86_model) { | 951 | switch (c->x86_model) { |
952 | case 0 ... 0xC: | ||
953 | /* sorry, don't know about them */ | ||
954 | break; | ||
841 | case 0xD: | 955 | case 0xD: |
842 | case 0xE: /* Pentium M */ | 956 | case 0xE: /* Pentium M */ |
843 | ds_configure(&ds_cfg_var); | 957 | ds_configure(&ds_cfg_pentium_m); |
844 | break; | ||
845 | case 0xF: /* Core2 */ | ||
846 | case 0x1C: /* Atom */ | ||
847 | ds_configure(&ds_cfg_64); | ||
848 | break; | 958 | break; |
849 | default: | 959 | default: /* Core2, Atom, ... */ |
850 | /* sorry, don't know about them */ | 960 | ds_configure(&ds_cfg_core2); |
851 | break; | 961 | break; |
852 | } | 962 | } |
853 | break; | 963 | break; |
@@ -856,7 +966,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
856 | case 0x0: | 966 | case 0x0: |
857 | case 0x1: | 967 | case 0x1: |
858 | case 0x2: /* Netburst */ | 968 | case 0x2: /* Netburst */ |
859 | ds_configure(&ds_cfg_var); | 969 | ds_configure(&ds_cfg_netburst); |
860 | break; | 970 | break; |
861 | default: | 971 | default: |
862 | /* sorry, don't know about them */ | 972 | /* sorry, don't know about them */ |
@@ -869,13 +979,52 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
869 | } | 979 | } |
870 | } | 980 | } |
871 | 981 | ||
872 | void ds_free(struct ds_context *context) | 982 | /* |
983 | * Change the DS configuration from tracing prev to tracing next. | ||
984 | */ | ||
985 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | ||
986 | { | ||
987 | struct ds_context *prev_ctx = prev->thread.ds_ctx; | ||
988 | struct ds_context *next_ctx = next->thread.ds_ctx; | ||
989 | |||
990 | if (prev_ctx) { | ||
991 | update_debugctlmsr(0); | ||
992 | |||
993 | if (prev_ctx->bts_master && | ||
994 | (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
995 | struct bts_struct ts = { | ||
996 | .qualifier = bts_task_departs, | ||
997 | .variant.timestamp.jiffies = jiffies_64, | ||
998 | .variant.timestamp.pid = prev->pid | ||
999 | }; | ||
1000 | bts_write(prev_ctx->bts_master, &ts); | ||
1001 | } | ||
1002 | } | ||
1003 | |||
1004 | if (next_ctx) { | ||
1005 | if (next_ctx->bts_master && | ||
1006 | (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
1007 | struct bts_struct ts = { | ||
1008 | .qualifier = bts_task_arrives, | ||
1009 | .variant.timestamp.jiffies = jiffies_64, | ||
1010 | .variant.timestamp.pid = next->pid | ||
1011 | }; | ||
1012 | bts_write(next_ctx->bts_master, &ts); | ||
1013 | } | ||
1014 | |||
1015 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | ||
1016 | } | ||
1017 | |||
1018 | update_debugctlmsr(next->thread.debugctlmsr); | ||
1019 | } | ||
1020 | |||
1021 | void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) | ||
1022 | { | ||
1023 | clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); | ||
1024 | tsk->thread.ds_ctx = NULL; | ||
1025 | } | ||
1026 | |||
1027 | void ds_exit_thread(struct task_struct *tsk) | ||
873 | { | 1028 | { |
874 | /* This is called when the task owning the parameter context | 1029 | WARN_ON(tsk->thread.ds_ctx); |
875 | * is dying. There should not be any user of that context left | ||
876 | * to disturb us, anymore. */ | ||
877 | unsigned long leftovers = context->count; | ||
878 | while (leftovers--) | ||
879 | ds_put_context(context); | ||
880 | } | 1030 | } |
881 | #endif /* CONFIG_X86_DS */ | ||