diff options
| -rw-r--r-- | arch/x86/Kconfig.cpu | 18 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/intel.c | 3 | ||||
| -rw-r--r-- | arch/x86/kernel/ds.c | 954 | ||||
| -rw-r--r-- | arch/x86/kernel/process_32.c | 50 | ||||
| -rw-r--r-- | arch/x86/kernel/process_64.c | 38 | ||||
| -rw-r--r-- | arch/x86/kernel/ptrace.c | 444 | ||||
| -rw-r--r-- | include/asm-x86/ds.h | 258 | ||||
| -rw-r--r-- | include/asm-x86/processor.h | 12 | ||||
| -rw-r--r-- | include/asm-x86/ptrace-abi.h | 14 | ||||
| -rw-r--r-- | include/asm-x86/ptrace.h | 38 |
10 files changed, 1301 insertions, 528 deletions
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index b225219c448c..60a85768cfcb 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
| @@ -418,3 +418,21 @@ config X86_MINIMUM_CPU_FAMILY | |||
| 418 | config X86_DEBUGCTLMSR | 418 | config X86_DEBUGCTLMSR |
| 419 | def_bool y | 419 | def_bool y |
| 420 | depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) | 420 | depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) |
| 421 | |||
| 422 | config X86_DS | ||
| 423 | bool "Debug Store support" | ||
| 424 | default y | ||
| 425 | help | ||
| 426 | Add support for Debug Store. | ||
| 427 | This allows the kernel to provide a memory buffer to the hardware | ||
| 428 | to store various profiling and tracing events. | ||
| 429 | |||
| 430 | config X86_PTRACE_BTS | ||
| 431 | bool "ptrace interface to Branch Trace Store" | ||
| 432 | default y | ||
| 433 | depends on (X86_DS && X86_DEBUGCTLMSR) | ||
| 434 | help | ||
| 435 | Add a ptrace interface to allow collecting an execution trace | ||
| 436 | of the traced task. | ||
| 437 | This collects control flow changes in a (cyclic) buffer and allows | ||
| 438 | debuggers to fill in the gaps and show an execution trace of the debuggee. | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b75f2569b8f8..f113ef4595f6 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
| @@ -222,10 +222,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
| 222 | set_cpu_cap(c, X86_FEATURE_BTS); | 222 | set_cpu_cap(c, X86_FEATURE_BTS); |
| 223 | if (!(l1 & (1<<12))) | 223 | if (!(l1 & (1<<12))) |
| 224 | set_cpu_cap(c, X86_FEATURE_PEBS); | 224 | set_cpu_cap(c, X86_FEATURE_PEBS); |
| 225 | ds_init_intel(c); | ||
| 225 | } | 226 | } |
| 226 | 227 | ||
| 227 | if (cpu_has_bts) | 228 | if (cpu_has_bts) |
| 228 | ds_init_intel(c); | 229 | ptrace_bts_init_intel(c); |
| 229 | 230 | ||
| 230 | /* | 231 | /* |
| 231 | * See if we have a good local APIC by checking for buggy Pentia, | 232 | * See if we have a good local APIC by checking for buggy Pentia, |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 11c11b8ec48d..2b69994fd3a8 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
| @@ -2,26 +2,49 @@ | |||
| 2 | * Debug Store support | 2 | * Debug Store support |
| 3 | * | 3 | * |
| 4 | * This provides a low-level interface to the hardware's Debug Store | 4 | * This provides a low-level interface to the hardware's Debug Store |
| 5 | * feature that is used for last branch recording (LBR) and | 5 | * feature that is used for branch trace store (BTS) and |
| 6 | * precise-event based sampling (PEBS). | 6 | * precise-event based sampling (PEBS). |
| 7 | * | 7 | * |
| 8 | * Different architectures use a different DS layout/pointer size. | 8 | * It manages: |
| 9 | * The below functions therefore work on a void*. | 9 | * - per-thread and per-cpu allocation of BTS and PEBS |
| 10 | * - buffer memory allocation (optional) | ||
| 11 | * - buffer overflow handling | ||
| 12 | * - buffer access | ||
| 10 | * | 13 | * |
| 14 | * It assumes: | ||
| 15 | * - get_task_struct on all parameter tasks | ||
| 16 | * - current is allowed to trace parameter tasks | ||
| 11 | * | 17 | * |
| 12 | * Since there is no user for PEBS, yet, only LBR (or branch | ||
| 13 | * trace store, BTS) is supported. | ||
| 14 | * | 18 | * |
| 15 | * | 19 | * Copyright (C) 2007-2008 Intel Corporation. |
| 16 | * Copyright (C) 2007 Intel Corporation. | 20 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 |
| 17 | * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 | ||
| 18 | */ | 21 | */ |
| 19 | 22 | ||
| 23 | |||
| 24 | #ifdef CONFIG_X86_DS | ||
| 25 | |||
| 20 | #include <asm/ds.h> | 26 | #include <asm/ds.h> |
| 21 | 27 | ||
| 22 | #include <linux/errno.h> | 28 | #include <linux/errno.h> |
| 23 | #include <linux/string.h> | 29 | #include <linux/string.h> |
| 24 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
| 31 | #include <linux/sched.h> | ||
| 32 | #include <linux/mm.h> | ||
| 33 | |||
| 34 | |||
| 35 | /* | ||
| 36 | * The configuration for a particular DS hardware implementation. | ||
| 37 | */ | ||
| 38 | struct ds_configuration { | ||
| 39 | /* the size of the DS structure in bytes */ | ||
| 40 | unsigned char sizeof_ds; | ||
| 41 | /* the size of one pointer-typed field in the DS structure in bytes; | ||
| 42 | this covers the first 8 fields related to buffer management. */ | ||
| 43 | unsigned char sizeof_field; | ||
| 44 | /* the size of a BTS/PEBS record in bytes */ | ||
| 45 | unsigned char sizeof_rec[2]; | ||
| 46 | }; | ||
| 47 | static struct ds_configuration ds_cfg; | ||
| 25 | 48 | ||
| 26 | 49 | ||
| 27 | /* | 50 | /* |
| @@ -44,378 +67,747 @@ | |||
| 44 | * (interrupt occurs when write pointer passes interrupt pointer) | 67 | * (interrupt occurs when write pointer passes interrupt pointer) |
| 45 | * - value to which counter is reset following counter overflow | 68 | * - value to which counter is reset following counter overflow |
| 46 | * | 69 | * |
| 47 | * On later architectures, the last branch recording hardware uses | 70 | * Later architectures use 64bit pointers throughout, whereas earlier |
| 48 | * 64bit pointers even in 32bit mode. | 71 | * architectures use 32bit pointers in 32bit mode. |
| 49 | * | ||
| 50 | * | ||
| 51 | * Branch Trace Store (BTS) records store information about control | ||
| 52 | * flow changes. They at least provide the following information: | ||
| 53 | * - source linear address | ||
| 54 | * - destination linear address | ||
| 55 | * | 72 | * |
| 56 | * Netburst supported a predicated bit that had been dropped in later | ||
| 57 | * architectures. We do not suppor it. | ||
| 58 | * | 73 | * |
| 74 | * We compute the base address for the first 8 fields based on: | ||
| 75 | * - the field size stored in the DS configuration | ||
| 76 | * - the relative field position | ||
| 77 | * - an offset giving the start of the respective region | ||
| 59 | * | 78 | * |
| 60 | * In order to abstract from the actual DS and BTS layout, we describe | 79 | * This offset is further used to index various arrays holding |
| 61 | * the access to the relevant fields. | 80 | * information for BTS and PEBS at the respective index. |
| 62 | * Thanks to Andi Kleen for proposing this design. | ||
| 63 | * | 81 | * |
| 64 | * The implementation, however, is not as general as it might seem. In | 82 | * On later 32bit processors, we only access the lower 32bit of the |
| 65 | * order to stay somewhat simple and efficient, we assume an | 83 | * 64bit pointer fields. The upper halves will be zeroed out. |
| 66 | * underlying unsigned type (mostly a pointer type) and we expect the | ||
| 67 | * field to be at least as big as that type. | ||
| 68 | */ | 84 | */ |
| 69 | 85 | ||
| 70 | /* | 86 | enum ds_field { |
| 71 | * A special from_ip address to indicate that the BTS record is an | 87 | ds_buffer_base = 0, |
| 72 | * info record that needs to be interpreted or skipped. | 88 | ds_index, |
| 73 | */ | 89 | ds_absolute_maximum, |
| 74 | #define BTS_ESCAPE_ADDRESS (-1) | 90 | ds_interrupt_threshold, |
| 91 | }; | ||
| 75 | 92 | ||
| 76 | /* | 93 | enum ds_qualifier { |
| 77 | * A field access descriptor | 94 | ds_bts = 0, |
| 78 | */ | 95 | ds_pebs |
| 79 | struct access_desc { | ||
| 80 | unsigned char offset; | ||
| 81 | unsigned char size; | ||
| 82 | }; | 96 | }; |
| 83 | 97 | ||
| 98 | static inline unsigned long ds_get(const unsigned char *base, | ||
| 99 | enum ds_qualifier qual, enum ds_field field) | ||
| 100 | { | ||
| 101 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | ||
| 102 | return *(unsigned long *)base; | ||
| 103 | } | ||
| 104 | |||
| 105 | static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | ||
| 106 | enum ds_field field, unsigned long value) | ||
| 107 | { | ||
| 108 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | ||
| 109 | (*(unsigned long *)base) = value; | ||
| 110 | } | ||
| 111 | |||
| 112 | |||
| 84 | /* | 113 | /* |
| 85 | * The configuration for a particular DS/BTS hardware implementation. | 114 | * Locking is done only for allocating BTS or PEBS resources and for |
| 115 | * guarding context and buffer memory allocation. | ||
| 116 | * | ||
| 117 | * Most functions require the current task to own the ds context part | ||
| 118 | * they are going to access. All the locking is done when validating | ||
| 119 | * access to the context. | ||
| 86 | */ | 120 | */ |
| 87 | struct ds_configuration { | 121 | static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); |
| 88 | /* the DS configuration */ | ||
| 89 | unsigned char sizeof_ds; | ||
| 90 | struct access_desc bts_buffer_base; | ||
| 91 | struct access_desc bts_index; | ||
| 92 | struct access_desc bts_absolute_maximum; | ||
| 93 | struct access_desc bts_interrupt_threshold; | ||
| 94 | /* the BTS configuration */ | ||
| 95 | unsigned char sizeof_bts; | ||
| 96 | struct access_desc from_ip; | ||
| 97 | struct access_desc to_ip; | ||
| 98 | /* BTS variants used to store additional information like | ||
| 99 | timestamps */ | ||
| 100 | struct access_desc info_type; | ||
| 101 | struct access_desc info_data; | ||
| 102 | unsigned long debugctl_mask; | ||
| 103 | }; | ||
| 104 | 122 | ||
| 105 | /* | 123 | /* |
| 106 | * The global configuration used by the below accessor functions | 124 | * Validate that the current task is allowed to access the BTS/PEBS |
| 125 | * buffer of the parameter task. | ||
| 126 | * | ||
| 127 | * Returns 0, if access is granted; -Eerrno, otherwise. | ||
| 107 | */ | 128 | */ |
| 108 | static struct ds_configuration ds_cfg; | 129 | static inline int ds_validate_access(struct ds_context *context, |
| 130 | enum ds_qualifier qual) | ||
| 131 | { | ||
| 132 | if (!context) | ||
| 133 | return -EPERM; | ||
| 134 | |||
| 135 | if (context->owner[qual] == current) | ||
| 136 | return 0; | ||
| 137 | |||
| 138 | return -EPERM; | ||
| 139 | } | ||
| 140 | |||
| 109 | 141 | ||
| 110 | /* | 142 | /* |
| 111 | * Accessor functions for some DS and BTS fields using the above | 143 | * We either support (system-wide) per-cpu or per-thread allocation. |
| 112 | * global ptrace_bts_cfg. | 144 | * We distinguish the two based on the task_struct pointer, where a |
| 145 | * NULL pointer indicates per-cpu allocation for the current cpu. | ||
| 146 | * | ||
| 147 | * Allocations are use-counted. As soon as resources are allocated, | ||
| 148 | * further allocations must be of the same type (per-cpu or | ||
| 149 | * per-thread). We model this by counting allocations (i.e. the number | ||
| 150 | * of tracers of a certain type) for one type negatively: | ||
| 151 | * =0 no tracers | ||
| 152 | * >0 number of per-thread tracers | ||
| 153 | * <0 number of per-cpu tracers | ||
| 154 | * | ||
| 155 | * The below functions to get and put tracers and to check the | ||
| 156 | * allocation type require the ds_lock to be held by the caller. | ||
| 157 | * | ||
| 158 | * Tracers essentially gives the number of ds contexts for a certain | ||
| 159 | * type of allocation. | ||
| 113 | */ | 160 | */ |
| 114 | static inline unsigned long get_bts_buffer_base(char *base) | 161 | static long tracers; |
| 162 | |||
| 163 | static inline void get_tracer(struct task_struct *task) | ||
| 115 | { | 164 | { |
| 116 | return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); | 165 | tracers += (task ? 1 : -1); |
| 117 | } | 166 | } |
| 118 | static inline void set_bts_buffer_base(char *base, unsigned long value) | 167 | |
| 168 | static inline void put_tracer(struct task_struct *task) | ||
| 119 | { | 169 | { |
| 120 | (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; | 170 | tracers -= (task ? 1 : -1); |
| 121 | } | 171 | } |
| 122 | static inline unsigned long get_bts_index(char *base) | 172 | |
| 173 | static inline int check_tracer(struct task_struct *task) | ||
| 123 | { | 174 | { |
| 124 | return *(unsigned long *)(base + ds_cfg.bts_index.offset); | 175 | return (task ? (tracers >= 0) : (tracers <= 0)); |
| 125 | } | 176 | } |
| 126 | static inline void set_bts_index(char *base, unsigned long value) | 177 | |
| 178 | |||
| 179 | /* | ||
| 180 | * The DS context is either attached to a thread or to a cpu: | ||
| 181 | * - in the former case, the thread_struct contains a pointer to the | ||
| 182 | * attached context. | ||
| 183 | * - in the latter case, we use a static array of per-cpu context | ||
| 184 | * pointers. | ||
| 185 | * | ||
| 186 | * Contexts are use-counted. They are allocated on first access and | ||
| 187 | * deallocated when the last user puts the context. | ||
| 188 | * | ||
| 189 | * We distinguish between an allocating and a non-allocating get of a | ||
| 190 | * context: | ||
| 191 | * - the allocating get is used for requesting BTS/PEBS resources. It | ||
| 192 | * requires the caller to hold the global ds_lock. | ||
| 193 | * - the non-allocating get is used for all other cases. A | ||
| 194 | * non-existing context indicates an error. It acquires and releases | ||
| 195 | * the ds_lock itself for obtaining the context. | ||
| 196 | * | ||
| 197 | * A context and its DS configuration are allocated and deallocated | ||
| 198 | * together. A context always has a DS configuration of the | ||
| 199 | * appropriate size. | ||
| 200 | */ | ||
| 201 | static DEFINE_PER_CPU(struct ds_context *, system_context); | ||
| 202 | |||
| 203 | #define this_system_context per_cpu(system_context, smp_processor_id()) | ||
| 204 | |||
| 205 | /* | ||
| 206 | * Returns the pointer to the parameter task's context or to the | ||
| 207 | * system-wide context, if task is NULL. | ||
| 208 | * | ||
| 209 | * Increases the use count of the returned context, if not NULL. | ||
| 210 | */ | ||
| 211 | static inline struct ds_context *ds_get_context(struct task_struct *task) | ||
| 127 | { | 212 | { |
| 128 | (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; | 213 | struct ds_context *context; |
| 214 | |||
| 215 | spin_lock(&ds_lock); | ||
| 216 | |||
| 217 | context = (task ? task->thread.ds_ctx : this_system_context); | ||
| 218 | if (context) | ||
| 219 | context->count++; | ||
| 220 | |||
| 221 | spin_unlock(&ds_lock); | ||
| 222 | |||
| 223 | return context; | ||
| 129 | } | 224 | } |
| 130 | static inline unsigned long get_bts_absolute_maximum(char *base) | 225 | |
| 226 | /* | ||
| 227 | * Same as ds_get_context, but allocates the context and it's DS | ||
| 228 | * structure, if necessary; returns NULL; if out of memory. | ||
| 229 | * | ||
| 230 | * pre: requires ds_lock to be held | ||
| 231 | */ | ||
| 232 | static inline struct ds_context *ds_alloc_context(struct task_struct *task) | ||
| 131 | { | 233 | { |
| 132 | return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); | 234 | struct ds_context **p_context = |
| 235 | (task ? &task->thread.ds_ctx : &this_system_context); | ||
| 236 | struct ds_context *context = *p_context; | ||
| 237 | |||
| 238 | if (!context) { | ||
| 239 | context = kzalloc(sizeof(*context), GFP_KERNEL); | ||
| 240 | |||
| 241 | if (!context) | ||
| 242 | return NULL; | ||
| 243 | |||
| 244 | context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | ||
| 245 | if (!context->ds) { | ||
| 246 | kfree(context); | ||
| 247 | return NULL; | ||
| 248 | } | ||
| 249 | |||
| 250 | *p_context = context; | ||
| 251 | |||
| 252 | context->this = p_context; | ||
| 253 | context->task = task; | ||
| 254 | |||
| 255 | if (task) | ||
| 256 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
| 257 | |||
| 258 | if (!task || (task == current)) | ||
| 259 | wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); | ||
| 260 | |||
| 261 | get_tracer(task); | ||
| 262 | } | ||
| 263 | |||
| 264 | context->count++; | ||
| 265 | |||
| 266 | return context; | ||
| 133 | } | 267 | } |
| 134 | static inline void set_bts_absolute_maximum(char *base, unsigned long value) | 268 | |
| 269 | /* | ||
| 270 | * Decreases the use count of the parameter context, if not NULL. | ||
| 271 | * Deallocates the context, if the use count reaches zero. | ||
| 272 | */ | ||
| 273 | static inline void ds_put_context(struct ds_context *context) | ||
| 135 | { | 274 | { |
| 136 | (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; | 275 | if (!context) |
| 276 | return; | ||
| 277 | |||
| 278 | spin_lock(&ds_lock); | ||
| 279 | |||
| 280 | if (--context->count) | ||
| 281 | goto out; | ||
| 282 | |||
| 283 | *(context->this) = NULL; | ||
| 284 | |||
| 285 | if (context->task) | ||
| 286 | clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | ||
| 287 | |||
| 288 | if (!context->task || (context->task == current)) | ||
| 289 | wrmsrl(MSR_IA32_DS_AREA, 0); | ||
| 290 | |||
| 291 | put_tracer(context->task); | ||
| 292 | |||
| 293 | /* free any leftover buffers from tracers that did not | ||
| 294 | * deallocate them properly. */ | ||
| 295 | kfree(context->buffer[ds_bts]); | ||
| 296 | kfree(context->buffer[ds_pebs]); | ||
| 297 | kfree(context->ds); | ||
| 298 | kfree(context); | ||
| 299 | out: | ||
| 300 | spin_unlock(&ds_lock); | ||
| 137 | } | 301 | } |
| 138 | static inline unsigned long get_bts_interrupt_threshold(char *base) | 302 | |
| 303 | |||
| 304 | /* | ||
| 305 | * Handle a buffer overflow | ||
| 306 | * | ||
| 307 | * task: the task whose buffers are overflowing; | ||
| 308 | * NULL for a buffer overflow on the current cpu | ||
| 309 | * context: the ds context | ||
| 310 | * qual: the buffer type | ||
| 311 | */ | ||
| 312 | static void ds_overflow(struct task_struct *task, struct ds_context *context, | ||
| 313 | enum ds_qualifier qual) | ||
| 139 | { | 314 | { |
| 140 | return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); | 315 | if (!context) |
| 316 | return; | ||
| 317 | |||
| 318 | if (context->callback[qual]) | ||
| 319 | (*context->callback[qual])(task); | ||
| 320 | |||
| 321 | /* todo: do some more overflow handling */ | ||
| 141 | } | 322 | } |
| 142 | static inline void set_bts_interrupt_threshold(char *base, unsigned long value) | 323 | |
| 324 | |||
| 325 | /* | ||
| 326 | * Allocate a non-pageable buffer of the parameter size. | ||
| 327 | * Checks the memory and the locked memory rlimit. | ||
| 328 | * | ||
| 329 | * Returns the buffer, if successful; | ||
| 330 | * NULL, if out of memory or rlimit exceeded. | ||
| 331 | * | ||
| 332 | * size: the requested buffer size in bytes | ||
| 333 | * pages (out): if not NULL, contains the number of pages reserved | ||
| 334 | */ | ||
| 335 | static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) | ||
| 143 | { | 336 | { |
| 144 | (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; | 337 | unsigned long rlim, vm, pgsz; |
| 338 | void *buffer; | ||
| 339 | |||
| 340 | pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; | ||
| 341 | |||
| 342 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | ||
| 343 | vm = current->mm->total_vm + pgsz; | ||
| 344 | if (rlim < vm) | ||
| 345 | return NULL; | ||
| 346 | |||
| 347 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | ||
| 348 | vm = current->mm->locked_vm + pgsz; | ||
| 349 | if (rlim < vm) | ||
| 350 | return NULL; | ||
| 351 | |||
| 352 | buffer = kzalloc(size, GFP_KERNEL); | ||
| 353 | if (!buffer) | ||
| 354 | return NULL; | ||
| 355 | |||
| 356 | current->mm->total_vm += pgsz; | ||
| 357 | current->mm->locked_vm += pgsz; | ||
| 358 | |||
| 359 | if (pages) | ||
| 360 | *pages = pgsz; | ||
| 361 | |||
| 362 | return buffer; | ||
| 145 | } | 363 | } |
| 146 | static inline unsigned long get_from_ip(char *base) | 364 | |
| 365 | static int ds_request(struct task_struct *task, void *base, size_t size, | ||
| 366 | ds_ovfl_callback_t ovfl, enum ds_qualifier qual) | ||
| 147 | { | 367 | { |
| 148 | return *(unsigned long *)(base + ds_cfg.from_ip.offset); | 368 | struct ds_context *context; |
| 369 | unsigned long buffer, adj; | ||
| 370 | const unsigned long alignment = (1 << 3); | ||
| 371 | int error = 0; | ||
| 372 | |||
| 373 | if (!ds_cfg.sizeof_ds) | ||
| 374 | return -EOPNOTSUPP; | ||
| 375 | |||
| 376 | /* we require some space to do alignment adjustments below */ | ||
| 377 | if (size < (alignment + ds_cfg.sizeof_rec[qual])) | ||
| 378 | return -EINVAL; | ||
| 379 | |||
| 380 | /* buffer overflow notification is not yet implemented */ | ||
| 381 | if (ovfl) | ||
| 382 | return -EOPNOTSUPP; | ||
| 383 | |||
| 384 | |||
| 385 | spin_lock(&ds_lock); | ||
| 386 | |||
| 387 | if (!check_tracer(task)) | ||
| 388 | return -EPERM; | ||
| 389 | |||
| 390 | error = -ENOMEM; | ||
| 391 | context = ds_alloc_context(task); | ||
| 392 | if (!context) | ||
| 393 | goto out_unlock; | ||
| 394 | |||
| 395 | error = -EALREADY; | ||
| 396 | if (context->owner[qual] == current) | ||
| 397 | goto out_unlock; | ||
| 398 | error = -EPERM; | ||
| 399 | if (context->owner[qual] != NULL) | ||
| 400 | goto out_unlock; | ||
| 401 | context->owner[qual] = current; | ||
| 402 | |||
| 403 | spin_unlock(&ds_lock); | ||
| 404 | |||
| 405 | |||
| 406 | error = -ENOMEM; | ||
| 407 | if (!base) { | ||
| 408 | base = ds_allocate_buffer(size, &context->pages[qual]); | ||
| 409 | if (!base) | ||
| 410 | goto out_release; | ||
| 411 | |||
| 412 | context->buffer[qual] = base; | ||
| 413 | } | ||
| 414 | error = 0; | ||
| 415 | |||
| 416 | context->callback[qual] = ovfl; | ||
| 417 | |||
| 418 | /* adjust the buffer address and size to meet alignment | ||
| 419 | * constraints: | ||
| 420 | * - buffer is double-word aligned | ||
| 421 | * - size is multiple of record size | ||
| 422 | * | ||
| 423 | * We checked the size at the very beginning; we have enough | ||
| 424 | * space to do the adjustment. | ||
| 425 | */ | ||
| 426 | buffer = (unsigned long)base; | ||
| 427 | |||
| 428 | adj = ALIGN(buffer, alignment) - buffer; | ||
| 429 | buffer += adj; | ||
| 430 | size -= adj; | ||
| 431 | |||
| 432 | size /= ds_cfg.sizeof_rec[qual]; | ||
| 433 | size *= ds_cfg.sizeof_rec[qual]; | ||
| 434 | |||
| 435 | ds_set(context->ds, qual, ds_buffer_base, buffer); | ||
| 436 | ds_set(context->ds, qual, ds_index, buffer); | ||
| 437 | ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); | ||
| 438 | |||
| 439 | if (ovfl) { | ||
| 440 | /* todo: select a suitable interrupt threshold */ | ||
| 441 | } else | ||
| 442 | ds_set(context->ds, qual, | ||
| 443 | ds_interrupt_threshold, buffer + size + 1); | ||
| 444 | |||
| 445 | /* we keep the context until ds_release */ | ||
| 446 | return error; | ||
| 447 | |||
| 448 | out_release: | ||
| 449 | context->owner[qual] = NULL; | ||
| 450 | ds_put_context(context); | ||
| 451 | return error; | ||
| 452 | |||
| 453 | out_unlock: | ||
| 454 | spin_unlock(&ds_lock); | ||
| 455 | ds_put_context(context); | ||
| 456 | return error; | ||
| 149 | } | 457 | } |
| 150 | static inline void set_from_ip(char *base, unsigned long value) | 458 | |
| 459 | int ds_request_bts(struct task_struct *task, void *base, size_t size, | ||
| 460 | ds_ovfl_callback_t ovfl) | ||
| 151 | { | 461 | { |
| 152 | (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; | 462 | return ds_request(task, base, size, ovfl, ds_bts); |
| 153 | } | 463 | } |
| 154 | static inline unsigned long get_to_ip(char *base) | 464 | |
| 465 | int ds_request_pebs(struct task_struct *task, void *base, size_t size, | ||
| 466 | ds_ovfl_callback_t ovfl) | ||
| 155 | { | 467 | { |
| 156 | return *(unsigned long *)(base + ds_cfg.to_ip.offset); | 468 | return ds_request(task, base, size, ovfl, ds_pebs); |
| 157 | } | 469 | } |
| 158 | static inline void set_to_ip(char *base, unsigned long value) | 470 | |
| 471 | static int ds_release(struct task_struct *task, enum ds_qualifier qual) | ||
| 159 | { | 472 | { |
| 160 | (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; | 473 | struct ds_context *context; |
| 474 | int error; | ||
| 475 | |||
| 476 | context = ds_get_context(task); | ||
| 477 | error = ds_validate_access(context, qual); | ||
| 478 | if (error < 0) | ||
| 479 | goto out; | ||
| 480 | |||
| 481 | kfree(context->buffer[qual]); | ||
| 482 | context->buffer[qual] = NULL; | ||
| 483 | |||
| 484 | current->mm->total_vm -= context->pages[qual]; | ||
| 485 | current->mm->locked_vm -= context->pages[qual]; | ||
| 486 | context->pages[qual] = 0; | ||
| 487 | context->owner[qual] = NULL; | ||
| 488 | |||
| 489 | /* | ||
| 490 | * we put the context twice: | ||
| 491 | * once for the ds_get_context | ||
| 492 | * once for the corresponding ds_request | ||
| 493 | */ | ||
| 494 | ds_put_context(context); | ||
| 495 | out: | ||
| 496 | ds_put_context(context); | ||
| 497 | return error; | ||
| 161 | } | 498 | } |
| 162 | static inline unsigned char get_info_type(char *base) | 499 | |
| 500 | int ds_release_bts(struct task_struct *task) | ||
| 163 | { | 501 | { |
| 164 | return *(unsigned char *)(base + ds_cfg.info_type.offset); | 502 | return ds_release(task, ds_bts); |
| 165 | } | 503 | } |
| 166 | static inline void set_info_type(char *base, unsigned char value) | 504 | |
| 505 | int ds_release_pebs(struct task_struct *task) | ||
| 167 | { | 506 | { |
| 168 | (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; | 507 | return ds_release(task, ds_pebs); |
| 169 | } | 508 | } |
| 170 | static inline unsigned long get_info_data(char *base) | 509 | |
| 510 | static int ds_get_index(struct task_struct *task, size_t *pos, | ||
| 511 | enum ds_qualifier qual) | ||
| 171 | { | 512 | { |
| 172 | return *(unsigned long *)(base + ds_cfg.info_data.offset); | 513 | struct ds_context *context; |
| 514 | unsigned long base, index; | ||
| 515 | int error; | ||
| 516 | |||
| 517 | context = ds_get_context(task); | ||
| 518 | error = ds_validate_access(context, qual); | ||
| 519 | if (error < 0) | ||
| 520 | goto out; | ||
| 521 | |||
| 522 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
| 523 | index = ds_get(context->ds, qual, ds_index); | ||
| 524 | |||
| 525 | error = ((index - base) / ds_cfg.sizeof_rec[qual]); | ||
| 526 | if (pos) | ||
| 527 | *pos = error; | ||
| 528 | out: | ||
| 529 | ds_put_context(context); | ||
| 530 | return error; | ||
| 173 | } | 531 | } |
| 174 | static inline void set_info_data(char *base, unsigned long value) | 532 | |
| 533 | int ds_get_bts_index(struct task_struct *task, size_t *pos) | ||
| 175 | { | 534 | { |
| 176 | (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; | 535 | return ds_get_index(task, pos, ds_bts); |
| 177 | } | 536 | } |
| 178 | 537 | ||
| 538 | int ds_get_pebs_index(struct task_struct *task, size_t *pos) | ||
| 539 | { | ||
| 540 | return ds_get_index(task, pos, ds_pebs); | ||
| 541 | } | ||
| 179 | 542 | ||
| 180 | int ds_allocate(void **dsp, size_t bts_size_in_bytes) | 543 | static int ds_get_end(struct task_struct *task, size_t *pos, |
| 544 | enum ds_qualifier qual) | ||
| 181 | { | 545 | { |
| 182 | size_t bts_size_in_records; | 546 | struct ds_context *context; |
| 183 | unsigned long bts; | 547 | unsigned long base, end; |
| 184 | void *ds; | 548 | int error; |
| 549 | |||
| 550 | context = ds_get_context(task); | ||
| 551 | error = ds_validate_access(context, qual); | ||
| 552 | if (error < 0) | ||
| 553 | goto out; | ||
| 554 | |||
| 555 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
| 556 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
| 557 | |||
| 558 | error = ((end - base) / ds_cfg.sizeof_rec[qual]); | ||
| 559 | if (pos) | ||
| 560 | *pos = error; | ||
| 561 | out: | ||
| 562 | ds_put_context(context); | ||
| 563 | return error; | ||
| 564 | } | ||
| 185 | 565 | ||
| 186 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | 566 | int ds_get_bts_end(struct task_struct *task, size_t *pos) |
| 187 | return -EOPNOTSUPP; | 567 | { |
| 568 | return ds_get_end(task, pos, ds_bts); | ||
| 569 | } | ||
| 188 | 570 | ||
| 189 | if (bts_size_in_bytes < 0) | 571 | int ds_get_pebs_end(struct task_struct *task, size_t *pos) |
| 190 | return -EINVAL; | 572 | { |
| 573 | return ds_get_end(task, pos, ds_pebs); | ||
| 574 | } | ||
| 191 | 575 | ||
| 192 | bts_size_in_records = | 576 | static int ds_access(struct task_struct *task, size_t index, |
| 193 | bts_size_in_bytes / ds_cfg.sizeof_bts; | 577 | const void **record, enum ds_qualifier qual) |
| 194 | bts_size_in_bytes = | 578 | { |
| 195 | bts_size_in_records * ds_cfg.sizeof_bts; | 579 | struct ds_context *context; |
| 580 | unsigned long base, idx; | ||
| 581 | int error; | ||
| 196 | 582 | ||
| 197 | if (bts_size_in_bytes <= 0) | 583 | if (!record) |
| 198 | return -EINVAL; | 584 | return -EINVAL; |
| 199 | 585 | ||
| 200 | bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); | 586 | context = ds_get_context(task); |
| 201 | 587 | error = ds_validate_access(context, qual); | |
| 202 | if (!bts) | 588 | if (error < 0) |
| 203 | return -ENOMEM; | 589 | goto out; |
| 204 | 590 | ||
| 205 | ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | 591 | base = ds_get(context->ds, qual, ds_buffer_base); |
| 592 | idx = base + (index * ds_cfg.sizeof_rec[qual]); | ||
| 206 | 593 | ||
| 207 | if (!ds) { | 594 | error = -EINVAL; |
| 208 | kfree((void *)bts); | 595 | if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) |
| 209 | return -ENOMEM; | 596 | goto out; |
| 210 | } | ||
| 211 | |||
| 212 | set_bts_buffer_base(ds, bts); | ||
| 213 | set_bts_index(ds, bts); | ||
| 214 | set_bts_absolute_maximum(ds, bts + bts_size_in_bytes); | ||
| 215 | set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1); | ||
| 216 | 597 | ||
| 217 | *dsp = ds; | 598 | *record = (const void *)idx; |
| 218 | return 0; | 599 | error = ds_cfg.sizeof_rec[qual]; |
| 600 | out: | ||
| 601 | ds_put_context(context); | ||
| 602 | return error; | ||
| 219 | } | 603 | } |
| 220 | 604 | ||
| 221 | int ds_free(void **dsp) | 605 | int ds_access_bts(struct task_struct *task, size_t index, const void **record) |
| 222 | { | 606 | { |
| 223 | if (*dsp) { | 607 | return ds_access(task, index, record, ds_bts); |
| 224 | kfree((void *)get_bts_buffer_base(*dsp)); | ||
| 225 | kfree(*dsp); | ||
| 226 | *dsp = NULL; | ||
| 227 | } | ||
| 228 | return 0; | ||
| 229 | } | 608 | } |
| 230 | 609 | ||
| 231 | int ds_get_bts_size(void *ds) | 610 | int ds_access_pebs(struct task_struct *task, size_t index, const void **record) |
| 232 | { | 611 | { |
| 233 | int size_in_bytes; | 612 | return ds_access(task, index, record, ds_pebs); |
| 234 | |||
| 235 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
| 236 | return -EOPNOTSUPP; | ||
| 237 | |||
| 238 | if (!ds) | ||
| 239 | return 0; | ||
| 240 | |||
| 241 | size_in_bytes = | ||
| 242 | get_bts_absolute_maximum(ds) - | ||
| 243 | get_bts_buffer_base(ds); | ||
| 244 | return size_in_bytes; | ||
| 245 | } | 613 | } |
| 246 | 614 | ||
| 247 | int ds_get_bts_end(void *ds) | 615 | static int ds_write(struct task_struct *task, const void *record, size_t size, |
| 616 | enum ds_qualifier qual, int force) | ||
| 248 | { | 617 | { |
| 249 | int size_in_bytes = ds_get_bts_size(ds); | 618 | struct ds_context *context; |
| 250 | 619 | int error; | |
| 251 | if (size_in_bytes <= 0) | ||
| 252 | return size_in_bytes; | ||
| 253 | 620 | ||
| 254 | return size_in_bytes / ds_cfg.sizeof_bts; | 621 | if (!record) |
| 255 | } | 622 | return -EINVAL; |
| 256 | 623 | ||
| 257 | int ds_get_bts_index(void *ds) | 624 | error = -EPERM; |
| 258 | { | 625 | context = ds_get_context(task); |
| 259 | int index_offset_in_bytes; | 626 | if (!context) |
| 627 | goto out; | ||
| 260 | 628 | ||
| 261 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | 629 | if (!force) { |
| 262 | return -EOPNOTSUPP; | 630 | error = ds_validate_access(context, qual); |
| 631 | if (error < 0) | ||
| 632 | goto out; | ||
| 633 | } | ||
| 263 | 634 | ||
| 264 | index_offset_in_bytes = | 635 | error = 0; |
| 265 | get_bts_index(ds) - | 636 | while (size) { |
| 266 | get_bts_buffer_base(ds); | 637 | unsigned long base, index, end, write_end, int_th; |
| 638 | unsigned long write_size, adj_write_size; | ||
| 639 | |||
| 640 | /* | ||
| 641 | * write as much as possible without producing an | ||
| 642 | * overflow interrupt. | ||
| 643 | * | ||
| 644 | * interrupt_threshold must either be | ||
| 645 | * - bigger than absolute_maximum or | ||
| 646 | * - point to a record between buffer_base and absolute_maximum | ||
| 647 | * | ||
| 648 | * index points to a valid record. | ||
| 649 | */ | ||
| 650 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
| 651 | index = ds_get(context->ds, qual, ds_index); | ||
| 652 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
| 653 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
| 654 | |||
| 655 | write_end = min(end, int_th); | ||
| 656 | |||
| 657 | /* if we are already beyond the interrupt threshold, | ||
| 658 | * we fill the entire buffer */ | ||
| 659 | if (write_end <= index) | ||
| 660 | write_end = end; | ||
| 661 | |||
| 662 | if (write_end <= index) | ||
| 663 | goto out; | ||
| 664 | |||
| 665 | write_size = min((unsigned long) size, write_end - index); | ||
| 666 | memcpy((void *)index, record, write_size); | ||
| 667 | |||
| 668 | record = (const char *)record + write_size; | ||
| 669 | size -= write_size; | ||
| 670 | error += write_size; | ||
| 671 | |||
| 672 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | ||
| 673 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | ||
| 674 | |||
| 675 | /* zero out trailing bytes */ | ||
| 676 | memset((char *)index + write_size, 0, | ||
| 677 | adj_write_size - write_size); | ||
| 678 | index += adj_write_size; | ||
| 679 | |||
| 680 | if (index >= end) | ||
| 681 | index = base; | ||
| 682 | ds_set(context->ds, qual, ds_index, index); | ||
| 683 | |||
| 684 | if (index >= int_th) | ||
| 685 | ds_overflow(task, context, qual); | ||
| 686 | } | ||
| 267 | 687 | ||
| 268 | return index_offset_in_bytes / ds_cfg.sizeof_bts; | 688 | out: |
| 689 | ds_put_context(context); | ||
| 690 | return error; | ||
| 269 | } | 691 | } |
| 270 | 692 | ||
| 271 | int ds_set_overflow(void *ds, int method) | 693 | int ds_write_bts(struct task_struct *task, const void *record, size_t size) |
| 272 | { | 694 | { |
| 273 | switch (method) { | 695 | return ds_write(task, record, size, ds_bts, /* force = */ 0); |
| 274 | case DS_O_SIGNAL: | ||
| 275 | return -EOPNOTSUPP; | ||
| 276 | case DS_O_WRAP: | ||
| 277 | return 0; | ||
| 278 | default: | ||
| 279 | return -EINVAL; | ||
| 280 | } | ||
| 281 | } | 696 | } |
| 282 | 697 | ||
| 283 | int ds_get_overflow(void *ds) | 698 | int ds_write_pebs(struct task_struct *task, const void *record, size_t size) |
| 284 | { | 699 | { |
| 285 | return DS_O_WRAP; | 700 | return ds_write(task, record, size, ds_pebs, /* force = */ 0); |
| 286 | } | 701 | } |
| 287 | 702 | ||
| 288 | int ds_clear(void *ds) | 703 | int ds_unchecked_write_bts(struct task_struct *task, |
| 704 | const void *record, size_t size) | ||
| 289 | { | 705 | { |
| 290 | int bts_size = ds_get_bts_size(ds); | 706 | return ds_write(task, record, size, ds_bts, /* force = */ 1); |
| 291 | unsigned long bts_base; | ||
| 292 | |||
| 293 | if (bts_size <= 0) | ||
| 294 | return bts_size; | ||
| 295 | |||
| 296 | bts_base = get_bts_buffer_base(ds); | ||
| 297 | memset((void *)bts_base, 0, bts_size); | ||
| 298 | |||
| 299 | set_bts_index(ds, bts_base); | ||
| 300 | return 0; | ||
| 301 | } | 707 | } |
| 302 | 708 | ||
| 303 | int ds_read_bts(void *ds, int index, struct bts_struct *out) | 709 | int ds_unchecked_write_pebs(struct task_struct *task, |
| 710 | const void *record, size_t size) | ||
| 304 | { | 711 | { |
| 305 | void *bts; | 712 | return ds_write(task, record, size, ds_pebs, /* force = */ 1); |
| 713 | } | ||
| 306 | 714 | ||
| 307 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | 715 | static int ds_reset_or_clear(struct task_struct *task, |
| 308 | return -EOPNOTSUPP; | 716 | enum ds_qualifier qual, int clear) |
| 717 | { | ||
| 718 | struct ds_context *context; | ||
| 719 | unsigned long base, end; | ||
| 720 | int error; | ||
| 309 | 721 | ||
| 310 | if (index < 0) | 722 | context = ds_get_context(task); |
| 311 | return -EINVAL; | 723 | error = ds_validate_access(context, qual); |
| 724 | if (error < 0) | ||
| 725 | goto out; | ||
| 312 | 726 | ||
| 313 | if (index >= ds_get_bts_size(ds)) | 727 | base = ds_get(context->ds, qual, ds_buffer_base); |
| 314 | return -EINVAL; | 728 | end = ds_get(context->ds, qual, ds_absolute_maximum); |
| 315 | 729 | ||
| 316 | bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); | 730 | if (clear) |
| 731 | memset((void *)base, 0, end - base); | ||
| 317 | 732 | ||
| 318 | memset(out, 0, sizeof(*out)); | 733 | ds_set(context->ds, qual, ds_index, base); |
| 319 | if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) { | ||
| 320 | out->qualifier = get_info_type(bts); | ||
| 321 | out->variant.jiffies = get_info_data(bts); | ||
| 322 | } else { | ||
| 323 | out->qualifier = BTS_BRANCH; | ||
| 324 | out->variant.lbr.from_ip = get_from_ip(bts); | ||
| 325 | out->variant.lbr.to_ip = get_to_ip(bts); | ||
| 326 | } | ||
| 327 | 734 | ||
| 328 | return sizeof(*out);; | 735 | error = 0; |
| 736 | out: | ||
| 737 | ds_put_context(context); | ||
| 738 | return error; | ||
| 329 | } | 739 | } |
| 330 | 740 | ||
| 331 | int ds_write_bts(void *ds, const struct bts_struct *in) | 741 | int ds_reset_bts(struct task_struct *task) |
| 332 | { | 742 | { |
| 333 | unsigned long bts; | 743 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); |
| 334 | 744 | } | |
| 335 | if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) | ||
| 336 | return -EOPNOTSUPP; | ||
| 337 | |||
| 338 | if (ds_get_bts_size(ds) <= 0) | ||
| 339 | return -ENXIO; | ||
| 340 | 745 | ||
| 341 | bts = get_bts_index(ds); | 746 | int ds_reset_pebs(struct task_struct *task) |
| 747 | { | ||
| 748 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); | ||
| 749 | } | ||
| 342 | 750 | ||
| 343 | memset((void *)bts, 0, ds_cfg.sizeof_bts); | 751 | int ds_clear_bts(struct task_struct *task) |
| 344 | switch (in->qualifier) { | 752 | { |
| 345 | case BTS_INVALID: | 753 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); |
| 346 | break; | 754 | } |
| 347 | 755 | ||
| 348 | case BTS_BRANCH: | 756 | int ds_clear_pebs(struct task_struct *task) |
| 349 | set_from_ip((void *)bts, in->variant.lbr.from_ip); | 757 | { |
| 350 | set_to_ip((void *)bts, in->variant.lbr.to_ip); | 758 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); |
| 351 | break; | 759 | } |
| 352 | 760 | ||
| 353 | case BTS_TASK_ARRIVES: | 761 | int ds_get_pebs_reset(struct task_struct *task, u64 *value) |
| 354 | case BTS_TASK_DEPARTS: | 762 | { |
| 355 | set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); | 763 | struct ds_context *context; |
| 356 | set_info_type((void *)bts, in->qualifier); | 764 | int error; |
| 357 | set_info_data((void *)bts, in->variant.jiffies); | ||
| 358 | break; | ||
| 359 | 765 | ||
| 360 | default: | 766 | if (!value) |
| 361 | return -EINVAL; | 767 | return -EINVAL; |
| 362 | } | ||
| 363 | 768 | ||
| 364 | bts = bts + ds_cfg.sizeof_bts; | 769 | context = ds_get_context(task); |
| 365 | if (bts >= get_bts_absolute_maximum(ds)) | 770 | error = ds_validate_access(context, ds_pebs); |
| 366 | bts = get_bts_buffer_base(ds); | 771 | if (error < 0) |
| 367 | set_bts_index(ds, bts); | 772 | goto out; |
| 368 | 773 | ||
| 369 | return ds_cfg.sizeof_bts; | 774 | *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); |
| 775 | |||
| 776 | error = 0; | ||
| 777 | out: | ||
| 778 | ds_put_context(context); | ||
| 779 | return error; | ||
| 370 | } | 780 | } |
| 371 | 781 | ||
| 372 | unsigned long ds_debugctl_mask(void) | 782 | int ds_set_pebs_reset(struct task_struct *task, u64 value) |
| 373 | { | 783 | { |
| 374 | return ds_cfg.debugctl_mask; | 784 | struct ds_context *context; |
| 375 | } | 785 | int error; |
| 376 | 786 | ||
| 377 | #ifdef __i386__ | 787 | context = ds_get_context(task); |
| 378 | static const struct ds_configuration ds_cfg_netburst = { | 788 | error = ds_validate_access(context, ds_pebs); |
| 379 | .sizeof_ds = 9 * 4, | 789 | if (error < 0) |
| 380 | .bts_buffer_base = { 0, 4 }, | 790 | goto out; |
| 381 | .bts_index = { 4, 4 }, | ||
| 382 | .bts_absolute_maximum = { 8, 4 }, | ||
| 383 | .bts_interrupt_threshold = { 12, 4 }, | ||
| 384 | .sizeof_bts = 3 * 4, | ||
| 385 | .from_ip = { 0, 4 }, | ||
| 386 | .to_ip = { 4, 4 }, | ||
| 387 | .info_type = { 4, 1 }, | ||
| 388 | .info_data = { 8, 4 }, | ||
| 389 | .debugctl_mask = (1<<2)|(1<<3) | ||
| 390 | }; | ||
| 391 | 791 | ||
| 392 | static const struct ds_configuration ds_cfg_pentium_m = { | 792 | *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; |
| 393 | .sizeof_ds = 9 * 4, | 793 | |
| 394 | .bts_buffer_base = { 0, 4 }, | 794 | error = 0; |
| 395 | .bts_index = { 4, 4 }, | 795 | out: |
| 396 | .bts_absolute_maximum = { 8, 4 }, | 796 | ds_put_context(context); |
| 397 | .bts_interrupt_threshold = { 12, 4 }, | 797 | return error; |
| 398 | .sizeof_bts = 3 * 4, | 798 | } |
| 399 | .from_ip = { 0, 4 }, | 799 | |
| 400 | .to_ip = { 4, 4 }, | 800 | static const struct ds_configuration ds_cfg_var = { |
| 401 | .info_type = { 4, 1 }, | 801 | .sizeof_ds = sizeof(long) * 12, |
| 402 | .info_data = { 8, 4 }, | 802 | .sizeof_field = sizeof(long), |
| 403 | .debugctl_mask = (1<<6)|(1<<7) | 803 | .sizeof_rec[ds_bts] = sizeof(long) * 3, |
| 804 | .sizeof_rec[ds_pebs] = sizeof(long) * 10 | ||
| 404 | }; | 805 | }; |
| 405 | #endif /* _i386_ */ | 806 | static const struct ds_configuration ds_cfg_64 = { |
| 406 | 807 | .sizeof_ds = 8 * 12, | |
| 407 | static const struct ds_configuration ds_cfg_core2 = { | 808 | .sizeof_field = 8, |
| 408 | .sizeof_ds = 9 * 8, | 809 | .sizeof_rec[ds_bts] = 8 * 3, |
| 409 | .bts_buffer_base = { 0, 8 }, | 810 | .sizeof_rec[ds_pebs] = 8 * 10 |
| 410 | .bts_index = { 8, 8 }, | ||
| 411 | .bts_absolute_maximum = { 16, 8 }, | ||
| 412 | .bts_interrupt_threshold = { 24, 8 }, | ||
| 413 | .sizeof_bts = 3 * 8, | ||
| 414 | .from_ip = { 0, 8 }, | ||
| 415 | .to_ip = { 8, 8 }, | ||
| 416 | .info_type = { 8, 1 }, | ||
| 417 | .info_data = { 16, 8 }, | ||
| 418 | .debugctl_mask = (1<<6)|(1<<7)|(1<<9) | ||
| 419 | }; | 811 | }; |
| 420 | 812 | ||
| 421 | static inline void | 813 | static inline void |
| @@ -429,14 +821,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
| 429 | switch (c->x86) { | 821 | switch (c->x86) { |
| 430 | case 0x6: | 822 | case 0x6: |
| 431 | switch (c->x86_model) { | 823 | switch (c->x86_model) { |
| 432 | #ifdef __i386__ | ||
| 433 | case 0xD: | 824 | case 0xD: |
| 434 | case 0xE: /* Pentium M */ | 825 | case 0xE: /* Pentium M */ |
| 435 | ds_configure(&ds_cfg_pentium_m); | 826 | ds_configure(&ds_cfg_var); |
| 436 | break; | 827 | break; |
| 437 | #endif /* _i386_ */ | ||
| 438 | case 0xF: /* Core2 */ | 828 | case 0xF: /* Core2 */ |
| 439 | ds_configure(&ds_cfg_core2); | 829 | case 0x1C: /* Atom */ |
| 830 | ds_configure(&ds_cfg_64); | ||
| 440 | break; | 831 | break; |
| 441 | default: | 832 | default: |
| 442 | /* sorry, don't know about them */ | 833 | /* sorry, don't know about them */ |
| @@ -445,13 +836,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
| 445 | break; | 836 | break; |
| 446 | case 0xF: | 837 | case 0xF: |
| 447 | switch (c->x86_model) { | 838 | switch (c->x86_model) { |
| 448 | #ifdef __i386__ | ||
| 449 | case 0x0: | 839 | case 0x0: |
| 450 | case 0x1: | 840 | case 0x1: |
| 451 | case 0x2: /* Netburst */ | 841 | case 0x2: /* Netburst */ |
| 452 | ds_configure(&ds_cfg_netburst); | 842 | ds_configure(&ds_cfg_var); |
| 453 | break; | 843 | break; |
| 454 | #endif /* _i386_ */ | ||
| 455 | default: | 844 | default: |
| 456 | /* sorry, don't know about them */ | 845 | /* sorry, don't know about them */ |
| 457 | break; | 846 | break; |
| @@ -462,3 +851,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
| 462 | break; | 851 | break; |
| 463 | } | 852 | } |
| 464 | } | 853 | } |
| 854 | |||
| 855 | void ds_free(struct ds_context *context) | ||
| 856 | { | ||
| 857 | /* This is called when the task owning the parameter context | ||
| 858 | * is dying. There should not be any user of that context left | ||
| 859 | * to disturb us, anymore. */ | ||
| 860 | unsigned long leftovers = context->count; | ||
| 861 | while (leftovers--) | ||
| 862 | ds_put_context(context); | ||
| 863 | } | ||
| 864 | #endif /* CONFIG_X86_DS */ | ||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 31f40b24bf5d..491eb1a7e073 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
| @@ -277,6 +277,14 @@ void exit_thread(void) | |||
| 277 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | 277 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; |
| 278 | put_cpu(); | 278 | put_cpu(); |
| 279 | } | 279 | } |
| 280 | #ifdef CONFIG_X86_DS | ||
| 281 | /* Free any DS contexts that have not been properly released. */ | ||
| 282 | if (unlikely(current->thread.ds_ctx)) { | ||
| 283 | /* we clear debugctl to make sure DS is not used. */ | ||
| 284 | update_debugctlmsr(0); | ||
| 285 | ds_free(current->thread.ds_ctx); | ||
| 286 | } | ||
| 287 | #endif /* CONFIG_X86_DS */ | ||
| 280 | } | 288 | } |
| 281 | 289 | ||
| 282 | void flush_thread(void) | 290 | void flush_thread(void) |
| @@ -438,6 +446,35 @@ int set_tsc_mode(unsigned int val) | |||
| 438 | return 0; | 446 | return 0; |
| 439 | } | 447 | } |
| 440 | 448 | ||
| 449 | #ifdef CONFIG_X86_DS | ||
| 450 | static int update_debugctl(struct thread_struct *prev, | ||
| 451 | struct thread_struct *next, unsigned long debugctl) | ||
| 452 | { | ||
| 453 | unsigned long ds_prev = 0; | ||
| 454 | unsigned long ds_next = 0; | ||
| 455 | |||
| 456 | if (prev->ds_ctx) | ||
| 457 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
| 458 | if (next->ds_ctx) | ||
| 459 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
| 460 | |||
| 461 | if (ds_next != ds_prev) { | ||
| 462 | /* we clear debugctl to make sure DS | ||
| 463 | * is not in use when we change it */ | ||
| 464 | debugctl = 0; | ||
| 465 | update_debugctlmsr(0); | ||
| 466 | wrmsr(MSR_IA32_DS_AREA, ds_next, 0); | ||
| 467 | } | ||
| 468 | return debugctl; | ||
| 469 | } | ||
| 470 | #else | ||
| 471 | static int update_debugctl(struct thread_struct *prev, | ||
| 472 | struct thread_struct *next, unsigned long debugctl) | ||
| 473 | { | ||
| 474 | return debugctl; | ||
| 475 | } | ||
| 476 | #endif /* CONFIG_X86_DS */ | ||
| 477 | |||
| 441 | static noinline void | 478 | static noinline void |
| 442 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | 479 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, |
| 443 | struct tss_struct *tss) | 480 | struct tss_struct *tss) |
| @@ -448,14 +485,7 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
| 448 | prev = &prev_p->thread; | 485 | prev = &prev_p->thread; |
| 449 | next = &next_p->thread; | 486 | next = &next_p->thread; |
| 450 | 487 | ||
| 451 | debugctl = prev->debugctlmsr; | 488 | debugctl = update_debugctl(prev, next, prev->debugctlmsr); |
| 452 | if (next->ds_area_msr != prev->ds_area_msr) { | ||
| 453 | /* we clear debugctl to make sure DS | ||
| 454 | * is not in use when we change it */ | ||
| 455 | debugctl = 0; | ||
| 456 | update_debugctlmsr(0); | ||
| 457 | wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); | ||
| 458 | } | ||
| 459 | 489 | ||
| 460 | if (next->debugctlmsr != debugctl) | 490 | if (next->debugctlmsr != debugctl) |
| 461 | update_debugctlmsr(next->debugctlmsr); | 491 | update_debugctlmsr(next->debugctlmsr); |
| @@ -479,13 +509,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
| 479 | hard_enable_TSC(); | 509 | hard_enable_TSC(); |
| 480 | } | 510 | } |
| 481 | 511 | ||
| 482 | #ifdef X86_BTS | 512 | #ifdef CONFIG_X86_PTRACE_BTS |
| 483 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | 513 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) |
| 484 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | 514 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); |
| 485 | 515 | ||
| 486 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | 516 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) |
| 487 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | 517 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); |
| 488 | #endif | 518 | #endif /* CONFIG_X86_PTRACE_BTS */ |
| 489 | 519 | ||
| 490 | 520 | ||
| 491 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | 521 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e12e0e4dd256..4e168b250aff 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
| @@ -240,6 +240,14 @@ void exit_thread(void) | |||
| 240 | t->io_bitmap_max = 0; | 240 | t->io_bitmap_max = 0; |
| 241 | put_cpu(); | 241 | put_cpu(); |
| 242 | } | 242 | } |
| 243 | #ifdef CONFIG_X86_DS | ||
| 244 | /* Free any DS contexts that have not been properly released. */ | ||
| 245 | if (unlikely(t->ds_ctx)) { | ||
| 246 | /* we clear debugctl to make sure DS is not used. */ | ||
| 247 | update_debugctlmsr(0); | ||
| 248 | ds_free(t->ds_ctx); | ||
| 249 | } | ||
| 250 | #endif /* CONFIG_X86_DS */ | ||
| 243 | } | 251 | } |
| 244 | 252 | ||
| 245 | void flush_thread(void) | 253 | void flush_thread(void) |
| @@ -473,13 +481,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
| 473 | next = &next_p->thread; | 481 | next = &next_p->thread; |
| 474 | 482 | ||
| 475 | debugctl = prev->debugctlmsr; | 483 | debugctl = prev->debugctlmsr; |
| 476 | if (next->ds_area_msr != prev->ds_area_msr) { | 484 | |
| 477 | /* we clear debugctl to make sure DS | 485 | #ifdef CONFIG_X86_DS |
| 478 | * is not in use when we change it */ | 486 | { |
| 479 | debugctl = 0; | 487 | unsigned long ds_prev = 0, ds_next = 0; |
| 480 | update_debugctlmsr(0); | 488 | |
| 481 | wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); | 489 | if (prev->ds_ctx) |
| 490 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
| 491 | if (next->ds_ctx) | ||
| 492 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
| 493 | |||
| 494 | if (ds_next != ds_prev) { | ||
| 495 | /* | ||
| 496 | * We clear debugctl to make sure DS | ||
| 497 | * is not in use when we change it: | ||
| 498 | */ | ||
| 499 | debugctl = 0; | ||
| 500 | update_debugctlmsr(0); | ||
| 501 | wrmsrl(MSR_IA32_DS_AREA, ds_next); | ||
| 502 | } | ||
| 482 | } | 503 | } |
| 504 | #endif /* CONFIG_X86_DS */ | ||
| 483 | 505 | ||
| 484 | if (next->debugctlmsr != debugctl) | 506 | if (next->debugctlmsr != debugctl) |
| 485 | update_debugctlmsr(next->debugctlmsr); | 507 | update_debugctlmsr(next->debugctlmsr); |
| @@ -517,13 +539,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
| 517 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 539 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
| 518 | } | 540 | } |
| 519 | 541 | ||
| 520 | #ifdef X86_BTS | 542 | #ifdef CONFIG_X86_PTRACE_BTS |
| 521 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | 543 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) |
| 522 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | 544 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); |
| 523 | 545 | ||
| 524 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | 546 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) |
| 525 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | 547 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); |
| 526 | #endif | 548 | #endif /* CONFIG_X86_PTRACE_BTS */ |
| 527 | } | 549 | } |
| 528 | 550 | ||
| 529 | /* | 551 | /* |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e37dccce85db..5df6093ac776 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
| @@ -554,45 +554,115 @@ static int ptrace_set_debugreg(struct task_struct *child, | |||
| 554 | return 0; | 554 | return 0; |
| 555 | } | 555 | } |
| 556 | 556 | ||
| 557 | #ifdef X86_BTS | 557 | #ifdef CONFIG_X86_PTRACE_BTS |
| 558 | /* | ||
| 559 | * The configuration for a particular BTS hardware implementation. | ||
| 560 | */ | ||
| 561 | struct bts_configuration { | ||
| 562 | /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */ | ||
| 563 | unsigned char sizeof_bts; | ||
| 564 | /* the size of a field in the BTS record in bytes */ | ||
| 565 | unsigned char sizeof_field; | ||
| 566 | /* a bitmask to enable/disable BTS in DEBUGCTL MSR */ | ||
| 567 | unsigned long debugctl_mask; | ||
| 568 | }; | ||
| 569 | static struct bts_configuration bts_cfg; | ||
| 570 | |||
| 571 | #define BTS_MAX_RECORD_SIZE (8 * 3) | ||
| 572 | |||
| 573 | |||
| 574 | /* | ||
| 575 | * Branch Trace Store (BTS) uses the following format. Different | ||
| 576 | * architectures vary in the size of those fields. | ||
| 577 | * - source linear address | ||
| 578 | * - destination linear address | ||
| 579 | * - flags | ||
| 580 | * | ||
| 581 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
| 582 | * architectures use 32bit pointers in 32bit mode. | ||
| 583 | * | ||
| 584 | * We compute the base address for the first 8 fields based on: | ||
| 585 | * - the field size stored in the DS configuration | ||
| 586 | * - the relative field position | ||
| 587 | * | ||
| 588 | * In order to store additional information in the BTS buffer, we use | ||
| 589 | * a special source address to indicate that the record requires | ||
| 590 | * special interpretation. | ||
| 591 | * | ||
| 592 | * Netburst indicated via a bit in the flags field whether the branch | ||
| 593 | * was predicted; this is ignored. | ||
| 594 | */ | ||
| 595 | |||
| 596 | enum bts_field { | ||
| 597 | bts_from = 0, | ||
| 598 | bts_to, | ||
| 599 | bts_flags, | ||
| 600 | |||
| 601 | bts_escape = (unsigned long)-1, | ||
| 602 | bts_qual = bts_to, | ||
| 603 | bts_jiffies = bts_flags | ||
| 604 | }; | ||
| 558 | 605 | ||
| 559 | static int ptrace_bts_get_size(struct task_struct *child) | 606 | static inline unsigned long bts_get(const char *base, enum bts_field field) |
| 560 | { | 607 | { |
| 561 | if (!child->thread.ds_area_msr) | 608 | base += (bts_cfg.sizeof_field * field); |
| 562 | return -ENXIO; | 609 | return *(unsigned long *)base; |
| 610 | } | ||
| 611 | |||
| 612 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) | ||
| 613 | { | ||
| 614 | base += (bts_cfg.sizeof_field * field);; | ||
| 615 | (*(unsigned long *)base) = val; | ||
| 616 | } | ||
| 563 | 617 | ||
| 564 | return ds_get_bts_index((void *)child->thread.ds_area_msr); | 618 | /* |
| 619 | * Translate a BTS record from the raw format into the bts_struct format | ||
| 620 | * | ||
| 621 | * out (out): bts_struct interpretation | ||
| 622 | * raw: raw BTS record | ||
| 623 | */ | ||
| 624 | static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw) | ||
| 625 | { | ||
| 626 | memset(out, 0, sizeof(*out)); | ||
| 627 | if (bts_get(raw, bts_from) == bts_escape) { | ||
| 628 | out->qualifier = bts_get(raw, bts_qual); | ||
| 629 | out->variant.jiffies = bts_get(raw, bts_jiffies); | ||
| 630 | } else { | ||
| 631 | out->qualifier = BTS_BRANCH; | ||
| 632 | out->variant.lbr.from_ip = bts_get(raw, bts_from); | ||
| 633 | out->variant.lbr.to_ip = bts_get(raw, bts_to); | ||
| 634 | } | ||
| 565 | } | 635 | } |
| 566 | 636 | ||
| 567 | static int ptrace_bts_read_record(struct task_struct *child, | 637 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, |
| 568 | long index, | ||
| 569 | struct bts_struct __user *out) | 638 | struct bts_struct __user *out) |
| 570 | { | 639 | { |
| 571 | struct bts_struct ret; | 640 | struct bts_struct ret; |
| 572 | int retval; | 641 | const void *bts_record; |
| 573 | int bts_end; | 642 | size_t bts_index, bts_end; |
| 574 | int bts_index; | 643 | int error; |
| 575 | 644 | ||
| 576 | if (!child->thread.ds_area_msr) | 645 | error = ds_get_bts_end(child, &bts_end); |
| 577 | return -ENXIO; | 646 | if (error < 0) |
| 647 | return error; | ||
| 578 | 648 | ||
| 579 | if (index < 0) | ||
| 580 | return -EINVAL; | ||
| 581 | |||
| 582 | bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr); | ||
| 583 | if (bts_end <= index) | 649 | if (bts_end <= index) |
| 584 | return -EINVAL; | 650 | return -EINVAL; |
| 585 | 651 | ||
| 652 | error = ds_get_bts_index(child, &bts_index); | ||
| 653 | if (error < 0) | ||
| 654 | return error; | ||
| 655 | |||
| 586 | /* translate the ptrace bts index into the ds bts index */ | 656 | /* translate the ptrace bts index into the ds bts index */ |
| 587 | bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); | 657 | bts_index += bts_end - (index + 1); |
| 588 | bts_index -= (index + 1); | 658 | if (bts_end <= bts_index) |
| 589 | if (bts_index < 0) | 659 | bts_index -= bts_end; |
| 590 | bts_index += bts_end; | 660 | |
| 661 | error = ds_access_bts(child, bts_index, &bts_record); | ||
| 662 | if (error < 0) | ||
| 663 | return error; | ||
| 591 | 664 | ||
| 592 | retval = ds_read_bts((void *)child->thread.ds_area_msr, | 665 | ptrace_bts_translate_record(&ret, bts_record); |
| 593 | bts_index, &ret); | ||
| 594 | if (retval < 0) | ||
| 595 | return retval; | ||
| 596 | 666 | ||
| 597 | if (copy_to_user(out, &ret, sizeof(ret))) | 667 | if (copy_to_user(out, &ret, sizeof(ret))) |
| 598 | return -EFAULT; | 668 | return -EFAULT; |
| @@ -600,101 +670,106 @@ static int ptrace_bts_read_record(struct task_struct *child, | |||
| 600 | return sizeof(ret); | 670 | return sizeof(ret); |
| 601 | } | 671 | } |
| 602 | 672 | ||
| 603 | static int ptrace_bts_clear(struct task_struct *child) | ||
| 604 | { | ||
| 605 | if (!child->thread.ds_area_msr) | ||
| 606 | return -ENXIO; | ||
| 607 | |||
| 608 | return ds_clear((void *)child->thread.ds_area_msr); | ||
| 609 | } | ||
| 610 | |||
| 611 | static int ptrace_bts_drain(struct task_struct *child, | 673 | static int ptrace_bts_drain(struct task_struct *child, |
| 612 | long size, | 674 | long size, |
| 613 | struct bts_struct __user *out) | 675 | struct bts_struct __user *out) |
| 614 | { | 676 | { |
| 615 | int end, i; | 677 | struct bts_struct ret; |
| 616 | void *ds = (void *)child->thread.ds_area_msr; | 678 | const unsigned char *raw; |
| 617 | 679 | size_t end, i; | |
| 618 | if (!ds) | 680 | int error; |
| 619 | return -ENXIO; | ||
| 620 | 681 | ||
| 621 | end = ds_get_bts_index(ds); | 682 | error = ds_get_bts_index(child, &end); |
| 622 | if (end <= 0) | 683 | if (error < 0) |
| 623 | return end; | 684 | return error; |
| 624 | 685 | ||
| 625 | if (size < (end * sizeof(struct bts_struct))) | 686 | if (size < (end * sizeof(struct bts_struct))) |
| 626 | return -EIO; | 687 | return -EIO; |
| 627 | 688 | ||
| 628 | for (i = 0; i < end; i++, out++) { | 689 | error = ds_access_bts(child, 0, (const void **)&raw); |
| 629 | struct bts_struct ret; | 690 | if (error < 0) |
| 630 | int retval; | 691 | return error; |
| 631 | 692 | ||
| 632 | retval = ds_read_bts(ds, i, &ret); | 693 | for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { |
| 633 | if (retval < 0) | 694 | ptrace_bts_translate_record(&ret, raw); |
| 634 | return retval; | ||
| 635 | 695 | ||
| 636 | if (copy_to_user(out, &ret, sizeof(ret))) | 696 | if (copy_to_user(out, &ret, sizeof(ret))) |
| 637 | return -EFAULT; | 697 | return -EFAULT; |
| 638 | } | 698 | } |
| 639 | 699 | ||
| 640 | ds_clear(ds); | 700 | error = ds_clear_bts(child); |
| 701 | if (error < 0) | ||
| 702 | return error; | ||
| 641 | 703 | ||
| 642 | return end; | 704 | return end; |
| 643 | } | 705 | } |
| 644 | 706 | ||
| 707 | static void ptrace_bts_ovfl(struct task_struct *child) | ||
| 708 | { | ||
| 709 | send_sig(child->thread.bts_ovfl_signal, child, 0); | ||
| 710 | } | ||
| 711 | |||
| 645 | static int ptrace_bts_config(struct task_struct *child, | 712 | static int ptrace_bts_config(struct task_struct *child, |
| 646 | long cfg_size, | 713 | long cfg_size, |
| 647 | const struct ptrace_bts_config __user *ucfg) | 714 | const struct ptrace_bts_config __user *ucfg) |
| 648 | { | 715 | { |
| 649 | struct ptrace_bts_config cfg; | 716 | struct ptrace_bts_config cfg; |
| 650 | int bts_size, ret = 0; | 717 | int error = 0; |
| 651 | void *ds; | 718 | |
| 719 | error = -EOPNOTSUPP; | ||
| 720 | if (!bts_cfg.sizeof_bts) | ||
| 721 | goto errout; | ||
| 652 | 722 | ||
| 723 | error = -EIO; | ||
| 653 | if (cfg_size < sizeof(cfg)) | 724 | if (cfg_size < sizeof(cfg)) |
| 654 | return -EIO; | 725 | goto errout; |
| 655 | 726 | ||
| 727 | error = -EFAULT; | ||
| 656 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | 728 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) |
| 657 | return -EFAULT; | 729 | goto errout; |
| 658 | 730 | ||
| 659 | if ((int)cfg.size < 0) | 731 | error = -EINVAL; |
| 660 | return -EINVAL; | 732 | if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && |
| 733 | !(cfg.flags & PTRACE_BTS_O_ALLOC)) | ||
| 734 | goto errout; | ||
| 661 | 735 | ||
| 662 | bts_size = 0; | 736 | if (cfg.flags & PTRACE_BTS_O_ALLOC) { |
| 663 | ds = (void *)child->thread.ds_area_msr; | 737 | ds_ovfl_callback_t ovfl = NULL; |
| 664 | if (ds) { | 738 | unsigned int sig = 0; |
| 665 | bts_size = ds_get_bts_size(ds); | 739 | |
| 666 | if (bts_size < 0) | 740 | /* we ignore the error in case we were not tracing child */ |
| 667 | return bts_size; | 741 | (void)ds_release_bts(child); |
| 668 | } | 742 | |
| 669 | cfg.size = PAGE_ALIGN(cfg.size); | 743 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { |
| 744 | if (!cfg.signal) | ||
| 745 | goto errout; | ||
| 746 | |||
| 747 | sig = cfg.signal; | ||
| 748 | ovfl = ptrace_bts_ovfl; | ||
| 749 | } | ||
| 670 | 750 | ||
| 671 | if (bts_size != cfg.size) { | 751 | error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); |
| 672 | ret = ptrace_bts_realloc(child, cfg.size, | 752 | if (error < 0) |
| 673 | cfg.flags & PTRACE_BTS_O_CUT_SIZE); | ||
| 674 | if (ret < 0) | ||
| 675 | goto errout; | 753 | goto errout; |
| 676 | 754 | ||
| 677 | ds = (void *)child->thread.ds_area_msr; | 755 | child->thread.bts_ovfl_signal = sig; |
| 678 | } | 756 | } |
| 679 | 757 | ||
| 680 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) | 758 | error = -EINVAL; |
| 681 | ret = ds_set_overflow(ds, DS_O_SIGNAL); | 759 | if (!child->thread.ds_ctx && cfg.flags) |
| 682 | else | ||
| 683 | ret = ds_set_overflow(ds, DS_O_WRAP); | ||
| 684 | if (ret < 0) | ||
| 685 | goto errout; | 760 | goto errout; |
| 686 | 761 | ||
| 687 | if (cfg.flags & PTRACE_BTS_O_TRACE) | 762 | if (cfg.flags & PTRACE_BTS_O_TRACE) |
| 688 | child->thread.debugctlmsr |= ds_debugctl_mask(); | 763 | child->thread.debugctlmsr |= bts_cfg.debugctl_mask; |
| 689 | else | 764 | else |
| 690 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | 765 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; |
| 691 | 766 | ||
| 692 | if (cfg.flags & PTRACE_BTS_O_SCHED) | 767 | if (cfg.flags & PTRACE_BTS_O_SCHED) |
| 693 | set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 768 | set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
| 694 | else | 769 | else |
| 695 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 770 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
| 696 | 771 | ||
| 697 | ret = sizeof(cfg); | 772 | error = sizeof(cfg); |
| 698 | 773 | ||
| 699 | out: | 774 | out: |
| 700 | if (child->thread.debugctlmsr) | 775 | if (child->thread.debugctlmsr) |
| @@ -702,10 +777,10 @@ out: | |||
| 702 | else | 777 | else |
| 703 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 778 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); |
| 704 | 779 | ||
| 705 | return ret; | 780 | return error; |
| 706 | 781 | ||
| 707 | errout: | 782 | errout: |
| 708 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | 783 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; |
| 709 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 784 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
| 710 | goto out; | 785 | goto out; |
| 711 | } | 786 | } |
| @@ -714,29 +789,40 @@ static int ptrace_bts_status(struct task_struct *child, | |||
| 714 | long cfg_size, | 789 | long cfg_size, |
| 715 | struct ptrace_bts_config __user *ucfg) | 790 | struct ptrace_bts_config __user *ucfg) |
| 716 | { | 791 | { |
| 717 | void *ds = (void *)child->thread.ds_area_msr; | ||
| 718 | struct ptrace_bts_config cfg; | 792 | struct ptrace_bts_config cfg; |
| 793 | size_t end; | ||
| 794 | const void *base, *max; | ||
| 795 | int error; | ||
| 719 | 796 | ||
| 720 | if (cfg_size < sizeof(cfg)) | 797 | if (cfg_size < sizeof(cfg)) |
| 721 | return -EIO; | 798 | return -EIO; |
| 722 | 799 | ||
| 723 | memset(&cfg, 0, sizeof(cfg)); | 800 | error = ds_get_bts_end(child, &end); |
| 801 | if (error < 0) | ||
| 802 | return error; | ||
| 724 | 803 | ||
| 725 | if (ds) { | 804 | error = ds_access_bts(child, /* index = */ 0, &base); |
| 726 | cfg.size = ds_get_bts_size(ds); | 805 | if (error < 0) |
| 806 | return error; | ||
| 727 | 807 | ||
| 728 | if (ds_get_overflow(ds) == DS_O_SIGNAL) | 808 | error = ds_access_bts(child, /* index = */ end, &max); |
| 729 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | 809 | if (error < 0) |
| 810 | return error; | ||
| 730 | 811 | ||
| 731 | if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && | 812 | memset(&cfg, 0, sizeof(cfg)); |
| 732 | child->thread.debugctlmsr & ds_debugctl_mask()) | 813 | cfg.size = (max - base); |
| 733 | cfg.flags |= PTRACE_BTS_O_TRACE; | 814 | cfg.signal = child->thread.bts_ovfl_signal; |
| 815 | cfg.bts_size = sizeof(struct bts_struct); | ||
| 734 | 816 | ||
| 735 | if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) | 817 | if (cfg.signal) |
| 736 | cfg.flags |= PTRACE_BTS_O_SCHED; | 818 | cfg.flags |= PTRACE_BTS_O_SIGNAL; |
| 737 | } | ||
| 738 | 819 | ||
| 739 | cfg.bts_size = sizeof(struct bts_struct); | 820 | if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && |
| 821 | child->thread.debugctlmsr & bts_cfg.debugctl_mask) | ||
| 822 | cfg.flags |= PTRACE_BTS_O_TRACE; | ||
| 823 | |||
| 824 | if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) | ||
| 825 | cfg.flags |= PTRACE_BTS_O_SCHED; | ||
| 740 | 826 | ||
| 741 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) | 827 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) |
| 742 | return -EFAULT; | 828 | return -EFAULT; |
| @@ -744,89 +830,38 @@ static int ptrace_bts_status(struct task_struct *child, | |||
| 744 | return sizeof(cfg); | 830 | return sizeof(cfg); |
| 745 | } | 831 | } |
| 746 | 832 | ||
| 747 | |||
| 748 | static int ptrace_bts_write_record(struct task_struct *child, | 833 | static int ptrace_bts_write_record(struct task_struct *child, |
| 749 | const struct bts_struct *in) | 834 | const struct bts_struct *in) |
| 750 | { | 835 | { |
| 751 | int retval; | 836 | unsigned char bts_record[BTS_MAX_RECORD_SIZE]; |
| 752 | 837 | ||
| 753 | if (!child->thread.ds_area_msr) | 838 | BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts); |
| 754 | return -ENXIO; | ||
| 755 | 839 | ||
| 756 | retval = ds_write_bts((void *)child->thread.ds_area_msr, in); | 840 | memset(bts_record, 0, bts_cfg.sizeof_bts); |
| 757 | if (retval) | 841 | switch (in->qualifier) { |
| 758 | return retval; | 842 | case BTS_INVALID: |
| 843 | break; | ||
| 759 | 844 | ||
| 760 | return sizeof(*in); | 845 | case BTS_BRANCH: |
| 761 | } | 846 | bts_set(bts_record, bts_from, in->variant.lbr.from_ip); |
| 847 | bts_set(bts_record, bts_to, in->variant.lbr.to_ip); | ||
| 848 | break; | ||
| 762 | 849 | ||
| 763 | static int ptrace_bts_realloc(struct task_struct *child, | 850 | case BTS_TASK_ARRIVES: |
| 764 | int size, int reduce_size) | 851 | case BTS_TASK_DEPARTS: |
| 765 | { | 852 | bts_set(bts_record, bts_from, bts_escape); |
| 766 | unsigned long rlim, vm; | 853 | bts_set(bts_record, bts_qual, in->qualifier); |
| 767 | int ret, old_size; | 854 | bts_set(bts_record, bts_jiffies, in->variant.jiffies); |
| 855 | break; | ||
| 768 | 856 | ||
| 769 | if (size < 0) | 857 | default: |
| 770 | return -EINVAL; | 858 | return -EINVAL; |
| 771 | |||
| 772 | old_size = ds_get_bts_size((void *)child->thread.ds_area_msr); | ||
| 773 | if (old_size < 0) | ||
| 774 | return old_size; | ||
| 775 | |||
| 776 | ret = ds_free((void **)&child->thread.ds_area_msr); | ||
| 777 | if (ret < 0) | ||
| 778 | goto out; | ||
| 779 | |||
| 780 | size >>= PAGE_SHIFT; | ||
| 781 | old_size >>= PAGE_SHIFT; | ||
| 782 | |||
| 783 | current->mm->total_vm -= old_size; | ||
| 784 | current->mm->locked_vm -= old_size; | ||
| 785 | |||
| 786 | if (size == 0) | ||
| 787 | goto out; | ||
| 788 | |||
| 789 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | ||
| 790 | vm = current->mm->total_vm + size; | ||
| 791 | if (rlim < vm) { | ||
| 792 | ret = -ENOMEM; | ||
| 793 | |||
| 794 | if (!reduce_size) | ||
| 795 | goto out; | ||
| 796 | |||
| 797 | size = rlim - current->mm->total_vm; | ||
| 798 | if (size <= 0) | ||
| 799 | goto out; | ||
| 800 | } | 859 | } |
| 801 | 860 | ||
| 802 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | 861 | /* The writing task will be the switched-to task on a context |
| 803 | vm = current->mm->locked_vm + size; | 862 | * switch. It needs to write into the switched-from task's BTS |
| 804 | if (rlim < vm) { | 863 | * buffer. */ |
| 805 | ret = -ENOMEM; | 864 | return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); |
| 806 | |||
| 807 | if (!reduce_size) | ||
| 808 | goto out; | ||
| 809 | |||
| 810 | size = rlim - current->mm->locked_vm; | ||
| 811 | if (size <= 0) | ||
| 812 | goto out; | ||
| 813 | } | ||
| 814 | |||
| 815 | ret = ds_allocate((void **)&child->thread.ds_area_msr, | ||
| 816 | size << PAGE_SHIFT); | ||
| 817 | if (ret < 0) | ||
| 818 | goto out; | ||
| 819 | |||
| 820 | current->mm->total_vm += size; | ||
| 821 | current->mm->locked_vm += size; | ||
| 822 | |||
| 823 | out: | ||
| 824 | if (child->thread.ds_area_msr) | ||
| 825 | set_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
| 826 | else | ||
| 827 | clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); | ||
| 828 | |||
| 829 | return ret; | ||
| 830 | } | 865 | } |
| 831 | 866 | ||
| 832 | void ptrace_bts_take_timestamp(struct task_struct *tsk, | 867 | void ptrace_bts_take_timestamp(struct task_struct *tsk, |
| @@ -839,7 +874,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk, | |||
| 839 | 874 | ||
| 840 | ptrace_bts_write_record(tsk, &rec); | 875 | ptrace_bts_write_record(tsk, &rec); |
| 841 | } | 876 | } |
| 842 | #endif /* X86_BTS */ | 877 | |
| 878 | static const struct bts_configuration bts_cfg_netburst = { | ||
| 879 | .sizeof_bts = sizeof(long) * 3, | ||
| 880 | .sizeof_field = sizeof(long), | ||
| 881 | .debugctl_mask = (1<<2)|(1<<3)|(1<<5) | ||
| 882 | }; | ||
| 883 | |||
| 884 | static const struct bts_configuration bts_cfg_pentium_m = { | ||
| 885 | .sizeof_bts = sizeof(long) * 3, | ||
| 886 | .sizeof_field = sizeof(long), | ||
| 887 | .debugctl_mask = (1<<6)|(1<<7) | ||
| 888 | }; | ||
| 889 | |||
| 890 | static const struct bts_configuration bts_cfg_core2 = { | ||
| 891 | .sizeof_bts = 8 * 3, | ||
| 892 | .sizeof_field = 8, | ||
| 893 | .debugctl_mask = (1<<6)|(1<<7)|(1<<9) | ||
| 894 | }; | ||
| 895 | |||
| 896 | static inline void bts_configure(const struct bts_configuration *cfg) | ||
| 897 | { | ||
| 898 | bts_cfg = *cfg; | ||
| 899 | } | ||
| 900 | |||
| 901 | void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) | ||
| 902 | { | ||
| 903 | switch (c->x86) { | ||
| 904 | case 0x6: | ||
| 905 | switch (c->x86_model) { | ||
| 906 | case 0xD: | ||
| 907 | case 0xE: /* Pentium M */ | ||
| 908 | bts_configure(&bts_cfg_pentium_m); | ||
| 909 | break; | ||
| 910 | case 0xF: /* Core2 */ | ||
| 911 | case 0x1C: /* Atom */ | ||
| 912 | bts_configure(&bts_cfg_core2); | ||
| 913 | break; | ||
| 914 | default: | ||
| 915 | /* sorry, don't know about them */ | ||
| 916 | break; | ||
| 917 | } | ||
| 918 | break; | ||
| 919 | case 0xF: | ||
| 920 | switch (c->x86_model) { | ||
| 921 | case 0x0: | ||
| 922 | case 0x1: | ||
| 923 | case 0x2: /* Netburst */ | ||
| 924 | bts_configure(&bts_cfg_netburst); | ||
| 925 | break; | ||
| 926 | default: | ||
| 927 | /* sorry, don't know about them */ | ||
| 928 | break; | ||
| 929 | } | ||
| 930 | break; | ||
| 931 | default: | ||
| 932 | /* sorry, don't know about them */ | ||
| 933 | break; | ||
| 934 | } | ||
| 935 | } | ||
| 936 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 843 | 937 | ||
| 844 | /* | 938 | /* |
| 845 | * Called by kernel/ptrace.c when detaching.. | 939 | * Called by kernel/ptrace.c when detaching.. |
| @@ -852,15 +946,15 @@ void ptrace_disable(struct task_struct *child) | |||
| 852 | #ifdef TIF_SYSCALL_EMU | 946 | #ifdef TIF_SYSCALL_EMU |
| 853 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | 947 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); |
| 854 | #endif | 948 | #endif |
| 855 | if (child->thread.ds_area_msr) { | 949 | #ifdef CONFIG_X86_PTRACE_BTS |
| 856 | #ifdef X86_BTS | 950 | (void)ds_release_bts(child); |
| 857 | ptrace_bts_realloc(child, 0, 0); | 951 | |
| 858 | #endif | 952 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; |
| 859 | child->thread.debugctlmsr &= ~ds_debugctl_mask(); | 953 | if (!child->thread.debugctlmsr) |
| 860 | if (!child->thread.debugctlmsr) | 954 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); |
| 861 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | 955 | |
| 862 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 956 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); |
| 863 | } | 957 | #endif /* CONFIG_X86_PTRACE_BTS */ |
| 864 | } | 958 | } |
| 865 | 959 | ||
| 866 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 960 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
| @@ -980,7 +1074,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
| 980 | /* | 1074 | /* |
| 981 | * These bits need more cooking - not enabled yet: | 1075 | * These bits need more cooking - not enabled yet: |
| 982 | */ | 1076 | */ |
| 983 | #ifdef X86_BTS | 1077 | #ifdef CONFIG_X86_PTRACE_BTS |
| 984 | case PTRACE_BTS_CONFIG: | 1078 | case PTRACE_BTS_CONFIG: |
| 985 | ret = ptrace_bts_config | 1079 | ret = ptrace_bts_config |
| 986 | (child, data, (struct ptrace_bts_config __user *)addr); | 1080 | (child, data, (struct ptrace_bts_config __user *)addr); |
| @@ -992,7 +1086,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
| 992 | break; | 1086 | break; |
| 993 | 1087 | ||
| 994 | case PTRACE_BTS_SIZE: | 1088 | case PTRACE_BTS_SIZE: |
| 995 | ret = ptrace_bts_get_size(child); | 1089 | ret = ds_get_bts_index(child, /* pos = */ NULL); |
| 996 | break; | 1090 | break; |
| 997 | 1091 | ||
| 998 | case PTRACE_BTS_GET: | 1092 | case PTRACE_BTS_GET: |
| @@ -1001,14 +1095,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
| 1001 | break; | 1095 | break; |
| 1002 | 1096 | ||
| 1003 | case PTRACE_BTS_CLEAR: | 1097 | case PTRACE_BTS_CLEAR: |
| 1004 | ret = ptrace_bts_clear(child); | 1098 | ret = ds_clear_bts(child); |
| 1005 | break; | 1099 | break; |
| 1006 | 1100 | ||
| 1007 | case PTRACE_BTS_DRAIN: | 1101 | case PTRACE_BTS_DRAIN: |
| 1008 | ret = ptrace_bts_drain | 1102 | ret = ptrace_bts_drain |
| 1009 | (child, data, (struct bts_struct __user *) addr); | 1103 | (child, data, (struct bts_struct __user *) addr); |
| 1010 | break; | 1104 | break; |
| 1011 | #endif | 1105 | #endif /* CONFIG_X86_PTRACE_BTS */ |
| 1012 | 1106 | ||
| 1013 | default: | 1107 | default: |
| 1014 | ret = ptrace_request(child, request, addr, data); | 1108 | ret = ptrace_request(child, request, addr, data); |
diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h index 6b27c686fa10..c3c953a45b21 100644 --- a/include/asm-x86/ds.h +++ b/include/asm-x86/ds.h | |||
| @@ -2,71 +2,237 @@ | |||
| 2 | * Debug Store (DS) support | 2 | * Debug Store (DS) support |
| 3 | * | 3 | * |
| 4 | * This provides a low-level interface to the hardware's Debug Store | 4 | * This provides a low-level interface to the hardware's Debug Store |
| 5 | * feature that is used for last branch recording (LBR) and | 5 | * feature that is used for branch trace store (BTS) and |
| 6 | * precise-event based sampling (PEBS). | 6 | * precise-event based sampling (PEBS). |
| 7 | * | 7 | * |
| 8 | * Different architectures use a different DS layout/pointer size. | 8 | * It manages: |
| 9 | * The below functions therefore work on a void*. | 9 | * - per-thread and per-cpu allocation of BTS and PEBS |
| 10 | * - buffer memory allocation (optional) | ||
| 11 | * - buffer overflow handling | ||
| 12 | * - buffer access | ||
| 10 | * | 13 | * |
| 14 | * It assumes: | ||
| 15 | * - get_task_struct on all parameter tasks | ||
| 16 | * - current is allowed to trace parameter tasks | ||
| 11 | * | 17 | * |
| 12 | * Since there is no user for PEBS, yet, only LBR (or branch | ||
| 13 | * trace store, BTS) is supported. | ||
| 14 | * | 18 | * |
| 15 | * | 19 | * Copyright (C) 2007-2008 Intel Corporation. |
| 16 | * Copyright (C) 2007 Intel Corporation. | 20 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 |
| 17 | * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 | ||
| 18 | */ | 21 | */ |
| 19 | 22 | ||
| 20 | #ifndef ASM_X86__DS_H | 23 | #ifndef ASM_X86__DS_H |
| 21 | #define ASM_X86__DS_H | 24 | #define ASM_X86__DS_H |
| 22 | 25 | ||
| 26 | #ifdef CONFIG_X86_DS | ||
| 27 | |||
| 23 | #include <linux/types.h> | 28 | #include <linux/types.h> |
| 24 | #include <linux/init.h> | 29 | #include <linux/init.h> |
| 25 | 30 | ||
| 26 | struct cpuinfo_x86; | ||
| 27 | 31 | ||
| 32 | struct task_struct; | ||
| 28 | 33 | ||
| 29 | /* a branch trace record entry | 34 | /* |
| 35 | * Request BTS or PEBS | ||
| 36 | * | ||
| 37 | * Due to alignement constraints, the actual buffer may be slightly | ||
| 38 | * smaller than the requested or provided buffer. | ||
| 30 | * | 39 | * |
| 31 | * In order to unify the interface between various processor versions, | 40 | * Returns 0 on success; -Eerrno otherwise |
| 32 | * we use the below data structure for all processors. | 41 | * |
| 42 | * task: the task to request recording for; | ||
| 43 | * NULL for per-cpu recording on the current cpu | ||
| 44 | * base: the base pointer for the (non-pageable) buffer; | ||
| 45 | * NULL if buffer allocation requested | ||
| 46 | * size: the size of the requested or provided buffer | ||
| 47 | * ovfl: pointer to a function to be called on buffer overflow; | ||
| 48 | * NULL if cyclic buffer requested | ||
| 33 | */ | 49 | */ |
| 34 | enum bts_qualifier { | 50 | typedef void (*ds_ovfl_callback_t)(struct task_struct *); |
| 35 | BTS_INVALID = 0, | 51 | extern int ds_request_bts(struct task_struct *task, void *base, size_t size, |
| 36 | BTS_BRANCH, | 52 | ds_ovfl_callback_t ovfl); |
| 37 | BTS_TASK_ARRIVES, | 53 | extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, |
| 38 | BTS_TASK_DEPARTS | 54 | ds_ovfl_callback_t ovfl); |
| 39 | }; | 55 | |
| 56 | /* | ||
| 57 | * Release BTS or PEBS resources | ||
| 58 | * | ||
| 59 | * Frees buffers allocated on ds_request. | ||
| 60 | * | ||
| 61 | * Returns 0 on success; -Eerrno otherwise | ||
| 62 | * | ||
| 63 | * task: the task to release resources for; | ||
| 64 | * NULL to release resources for the current cpu | ||
| 65 | */ | ||
| 66 | extern int ds_release_bts(struct task_struct *task); | ||
| 67 | extern int ds_release_pebs(struct task_struct *task); | ||
| 68 | |||
| 69 | /* | ||
| 70 | * Return the (array) index of the write pointer. | ||
| 71 | * (assuming an array of BTS/PEBS records) | ||
| 72 | * | ||
| 73 | * Returns -Eerrno on error | ||
| 74 | * | ||
| 75 | * task: the task to access; | ||
| 76 | * NULL to access the current cpu | ||
| 77 | * pos (out): if not NULL, will hold the result | ||
| 78 | */ | ||
| 79 | extern int ds_get_bts_index(struct task_struct *task, size_t *pos); | ||
| 80 | extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); | ||
| 81 | |||
| 82 | /* | ||
| 83 | * Return the (array) index one record beyond the end of the array. | ||
| 84 | * (assuming an array of BTS/PEBS records) | ||
| 85 | * | ||
| 86 | * Returns -Eerrno on error | ||
| 87 | * | ||
| 88 | * task: the task to access; | ||
| 89 | * NULL to access the current cpu | ||
| 90 | * pos (out): if not NULL, will hold the result | ||
| 91 | */ | ||
| 92 | extern int ds_get_bts_end(struct task_struct *task, size_t *pos); | ||
| 93 | extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); | ||
| 94 | |||
| 95 | /* | ||
| 96 | * Provide a pointer to the BTS/PEBS record at parameter index. | ||
| 97 | * (assuming an array of BTS/PEBS records) | ||
| 98 | * | ||
| 99 | * The pointer points directly into the buffer. The user is | ||
| 100 | * responsible for copying the record. | ||
| 101 | * | ||
| 102 | * Returns the size of a single record on success; -Eerrno on error | ||
| 103 | * | ||
| 104 | * task: the task to access; | ||
| 105 | * NULL to access the current cpu | ||
| 106 | * index: the index of the requested record | ||
| 107 | * record (out): pointer to the requested record | ||
| 108 | */ | ||
| 109 | extern int ds_access_bts(struct task_struct *task, | ||
| 110 | size_t index, const void **record); | ||
| 111 | extern int ds_access_pebs(struct task_struct *task, | ||
| 112 | size_t index, const void **record); | ||
| 113 | |||
| 114 | /* | ||
| 115 | * Write one or more BTS/PEBS records at the write pointer index and | ||
| 116 | * advance the write pointer. | ||
| 117 | * | ||
| 118 | * If size is not a multiple of the record size, trailing bytes are | ||
| 119 | * zeroed out. | ||
| 120 | * | ||
| 121 | * May result in one or more overflow notifications. | ||
| 122 | * | ||
| 123 | * If called during overflow handling, that is, with index >= | ||
| 124 | * interrupt threshold, the write will wrap around. | ||
| 125 | * | ||
| 126 | * An overflow notification is given if and when the interrupt | ||
| 127 | * threshold is reached during or after the write. | ||
| 128 | * | ||
| 129 | * Returns the number of bytes written or -Eerrno. | ||
| 130 | * | ||
| 131 | * task: the task to access; | ||
| 132 | * NULL to access the current cpu | ||
| 133 | * buffer: the buffer to write | ||
| 134 | * size: the size of the buffer | ||
| 135 | */ | ||
| 136 | extern int ds_write_bts(struct task_struct *task, | ||
| 137 | const void *buffer, size_t size); | ||
| 138 | extern int ds_write_pebs(struct task_struct *task, | ||
| 139 | const void *buffer, size_t size); | ||
| 140 | |||
| 141 | /* | ||
| 142 | * Same as ds_write_bts/pebs, but omit ownership checks. | ||
| 143 | * | ||
| 144 | * This is needed to have some other task than the owner of the | ||
| 145 | * BTS/PEBS buffer or the parameter task itself write into the | ||
| 146 | * respective buffer. | ||
| 147 | */ | ||
| 148 | extern int ds_unchecked_write_bts(struct task_struct *task, | ||
| 149 | const void *buffer, size_t size); | ||
| 150 | extern int ds_unchecked_write_pebs(struct task_struct *task, | ||
| 151 | const void *buffer, size_t size); | ||
| 152 | |||
| 153 | /* | ||
| 154 | * Reset the write pointer of the BTS/PEBS buffer. | ||
| 155 | * | ||
| 156 | * Returns 0 on success; -Eerrno on error | ||
| 157 | * | ||
| 158 | * task: the task to access; | ||
| 159 | * NULL to access the current cpu | ||
| 160 | */ | ||
| 161 | extern int ds_reset_bts(struct task_struct *task); | ||
| 162 | extern int ds_reset_pebs(struct task_struct *task); | ||
| 163 | |||
| 164 | /* | ||
| 165 | * Clear the BTS/PEBS buffer and reset the write pointer. | ||
| 166 | * The entire buffer will be zeroed out. | ||
| 167 | * | ||
| 168 | * Returns 0 on success; -Eerrno on error | ||
| 169 | * | ||
| 170 | * task: the task to access; | ||
| 171 | * NULL to access the current cpu | ||
| 172 | */ | ||
| 173 | extern int ds_clear_bts(struct task_struct *task); | ||
| 174 | extern int ds_clear_pebs(struct task_struct *task); | ||
| 175 | |||
| 176 | /* | ||
| 177 | * Provide the PEBS counter reset value. | ||
| 178 | * | ||
| 179 | * Returns 0 on success; -Eerrno on error | ||
| 180 | * | ||
| 181 | * task: the task to access; | ||
| 182 | * NULL to access the current cpu | ||
| 183 | * value (out): the counter reset value | ||
| 184 | */ | ||
| 185 | extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); | ||
| 186 | |||
| 187 | /* | ||
| 188 | * Set the PEBS counter reset value. | ||
| 189 | * | ||
| 190 | * Returns 0 on success; -Eerrno on error | ||
| 191 | * | ||
| 192 | * task: the task to access; | ||
| 193 | * NULL to access the current cpu | ||
| 194 | * value: the new counter reset value | ||
| 195 | */ | ||
| 196 | extern int ds_set_pebs_reset(struct task_struct *task, u64 value); | ||
| 197 | |||
| 198 | /* | ||
| 199 | * Initialization | ||
| 200 | */ | ||
| 201 | struct cpuinfo_x86; | ||
| 202 | extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); | ||
| 203 | |||
| 204 | |||
| 40 | 205 | ||
| 41 | struct bts_struct { | 206 | /* |
| 42 | u64 qualifier; | 207 | * The DS context - part of struct thread_struct. |
| 43 | union { | 208 | */ |
| 44 | /* BTS_BRANCH */ | 209 | struct ds_context { |
| 45 | struct { | 210 | /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ |
| 46 | u64 from_ip; | 211 | unsigned char *ds; |
| 47 | u64 to_ip; | 212 | /* the owner of the BTS and PEBS configuration, respectively */ |
| 48 | } lbr; | 213 | struct task_struct *owner[2]; |
| 49 | /* BTS_TASK_ARRIVES or | 214 | /* buffer overflow notification function for BTS and PEBS */ |
| 50 | BTS_TASK_DEPARTS */ | 215 | ds_ovfl_callback_t callback[2]; |
| 51 | u64 jiffies; | 216 | /* the original buffer address */ |
| 52 | } variant; | 217 | void *buffer[2]; |
| 218 | /* the number of allocated pages for on-request allocated buffers */ | ||
| 219 | unsigned int pages[2]; | ||
| 220 | /* use count */ | ||
| 221 | unsigned long count; | ||
| 222 | /* a pointer to the context location inside the thread_struct | ||
| 223 | * or the per_cpu context array */ | ||
| 224 | struct ds_context **this; | ||
| 225 | /* a pointer to the task owning this context, or NULL, if the | ||
| 226 | * context is owned by a cpu */ | ||
| 227 | struct task_struct *task; | ||
| 53 | }; | 228 | }; |
| 54 | 229 | ||
| 55 | /* Overflow handling mechanisms */ | 230 | /* called by exit_thread() to free leftover contexts */ |
| 56 | #define DS_O_SIGNAL 1 /* send overflow signal */ | 231 | extern void ds_free(struct ds_context *context); |
| 57 | #define DS_O_WRAP 2 /* wrap around */ | 232 | |
| 58 | 233 | #else /* CONFIG_X86_DS */ | |
| 59 | extern int ds_allocate(void **, size_t); | 234 | |
| 60 | extern int ds_free(void **); | 235 | #define ds_init_intel(config) do {} while (0) |
| 61 | extern int ds_get_bts_size(void *); | ||
| 62 | extern int ds_get_bts_end(void *); | ||
| 63 | extern int ds_get_bts_index(void *); | ||
| 64 | extern int ds_set_overflow(void *, int); | ||
| 65 | extern int ds_get_overflow(void *); | ||
| 66 | extern int ds_clear(void *); | ||
| 67 | extern int ds_read_bts(void *, int, struct bts_struct *); | ||
| 68 | extern int ds_write_bts(void *, const struct bts_struct *); | ||
| 69 | extern unsigned long ds_debugctl_mask(void); | ||
| 70 | extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c); | ||
| 71 | 236 | ||
| 237 | #endif /* CONFIG_X86_DS */ | ||
| 72 | #endif /* ASM_X86__DS_H */ | 238 | #endif /* ASM_X86__DS_H */ |
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 24cc5261af0c..51297d7f99c9 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h | |||
| @@ -20,6 +20,7 @@ struct mm_struct; | |||
| 20 | #include <asm/msr.h> | 20 | #include <asm/msr.h> |
| 21 | #include <asm/desc_defs.h> | 21 | #include <asm/desc_defs.h> |
| 22 | #include <asm/nops.h> | 22 | #include <asm/nops.h> |
| 23 | #include <asm/ds.h> | ||
| 23 | 24 | ||
| 24 | #include <linux/personality.h> | 25 | #include <linux/personality.h> |
| 25 | #include <linux/cpumask.h> | 26 | #include <linux/cpumask.h> |
| @@ -411,9 +412,14 @@ struct thread_struct { | |||
| 411 | unsigned io_bitmap_max; | 412 | unsigned io_bitmap_max; |
| 412 | /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ | 413 | /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ |
| 413 | unsigned long debugctlmsr; | 414 | unsigned long debugctlmsr; |
| 414 | /* Debug Store - if not 0 points to a DS Save Area configuration; | 415 | #ifdef CONFIG_X86_DS |
| 415 | * goes into MSR_IA32_DS_AREA */ | 416 | /* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ |
| 416 | unsigned long ds_area_msr; | 417 | struct ds_context *ds_ctx; |
| 418 | #endif /* CONFIG_X86_DS */ | ||
| 419 | #ifdef CONFIG_X86_PTRACE_BTS | ||
| 420 | /* the signal to send on a bts buffer overflow */ | ||
| 421 | unsigned int bts_ovfl_signal; | ||
| 422 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 417 | }; | 423 | }; |
| 418 | 424 | ||
| 419 | static inline unsigned long native_get_debugreg(int regno) | 425 | static inline unsigned long native_get_debugreg(int regno) |
diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h index d0cf3344a586..4298b8882a78 100644 --- a/include/asm-x86/ptrace-abi.h +++ b/include/asm-x86/ptrace-abi.h | |||
| @@ -80,8 +80,9 @@ | |||
| 80 | 80 | ||
| 81 | #define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ | 81 | #define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ |
| 82 | 82 | ||
| 83 | #ifndef __ASSEMBLY__ | 83 | #ifdef CONFIG_X86_PTRACE_BTS |
| 84 | 84 | ||
| 85 | #ifndef __ASSEMBLY__ | ||
| 85 | #include <asm/types.h> | 86 | #include <asm/types.h> |
| 86 | 87 | ||
| 87 | /* configuration/status structure used in PTRACE_BTS_CONFIG and | 88 | /* configuration/status structure used in PTRACE_BTS_CONFIG and |
| @@ -97,20 +98,20 @@ struct ptrace_bts_config { | |||
| 97 | /* actual size of bts_struct in bytes */ | 98 | /* actual size of bts_struct in bytes */ |
| 98 | __u32 bts_size; | 99 | __u32 bts_size; |
| 99 | }; | 100 | }; |
| 100 | #endif | 101 | #endif /* __ASSEMBLY__ */ |
| 101 | 102 | ||
| 102 | #define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ | 103 | #define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ |
| 103 | #define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ | 104 | #define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ |
| 104 | #define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow | 105 | #define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow |
| 105 | instead of wrapping around */ | 106 | instead of wrapping around */ |
| 106 | #define PTRACE_BTS_O_CUT_SIZE 0x8 /* cut requested size to max available | 107 | #define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */ |
| 107 | instead of failing */ | ||
| 108 | 108 | ||
| 109 | #define PTRACE_BTS_CONFIG 40 | 109 | #define PTRACE_BTS_CONFIG 40 |
| 110 | /* Configure branch trace recording. | 110 | /* Configure branch trace recording. |
| 111 | ADDR points to a struct ptrace_bts_config. | 111 | ADDR points to a struct ptrace_bts_config. |
| 112 | DATA gives the size of that buffer. | 112 | DATA gives the size of that buffer. |
| 113 | A new buffer is allocated, iff the size changes. | 113 | A new buffer is allocated, if requested in the flags. |
| 114 | An overflow signal may only be requested for new buffers. | ||
| 114 | Returns the number of bytes read. | 115 | Returns the number of bytes read. |
| 115 | */ | 116 | */ |
| 116 | #define PTRACE_BTS_STATUS 41 | 117 | #define PTRACE_BTS_STATUS 41 |
| @@ -119,7 +120,7 @@ struct ptrace_bts_config { | |||
| 119 | Returns the number of bytes written. | 120 | Returns the number of bytes written. |
| 120 | */ | 121 | */ |
| 121 | #define PTRACE_BTS_SIZE 42 | 122 | #define PTRACE_BTS_SIZE 42 |
| 122 | /* Return the number of available BTS records. | 123 | /* Return the number of available BTS records for draining. |
| 123 | DATA and ADDR are ignored. | 124 | DATA and ADDR are ignored. |
| 124 | */ | 125 | */ |
| 125 | #define PTRACE_BTS_GET 43 | 126 | #define PTRACE_BTS_GET 43 |
| @@ -139,5 +140,6 @@ struct ptrace_bts_config { | |||
| 139 | BTS records are read from oldest to newest. | 140 | BTS records are read from oldest to newest. |
| 140 | Returns number of BTS records drained. | 141 | Returns number of BTS records drained. |
| 141 | */ | 142 | */ |
| 143 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 142 | 144 | ||
| 143 | #endif /* ASM_X86__PTRACE_ABI_H */ | 145 | #endif /* ASM_X86__PTRACE_ABI_H */ |
diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h index d464f252edc3..45c75238cd08 100644 --- a/include/asm-x86/ptrace.h +++ b/include/asm-x86/ptrace.h | |||
| @@ -127,14 +127,48 @@ struct pt_regs { | |||
| 127 | #endif /* __KERNEL__ */ | 127 | #endif /* __KERNEL__ */ |
| 128 | #endif /* !__i386__ */ | 128 | #endif /* !__i386__ */ |
| 129 | 129 | ||
| 130 | |||
| 131 | #ifdef CONFIG_X86_PTRACE_BTS | ||
| 132 | /* a branch trace record entry | ||
| 133 | * | ||
| 134 | * In order to unify the interface between various processor versions, | ||
| 135 | * we use the below data structure for all processors. | ||
| 136 | */ | ||
| 137 | enum bts_qualifier { | ||
| 138 | BTS_INVALID = 0, | ||
| 139 | BTS_BRANCH, | ||
| 140 | BTS_TASK_ARRIVES, | ||
| 141 | BTS_TASK_DEPARTS | ||
| 142 | }; | ||
| 143 | |||
| 144 | struct bts_struct { | ||
| 145 | __u64 qualifier; | ||
| 146 | union { | ||
| 147 | /* BTS_BRANCH */ | ||
| 148 | struct { | ||
| 149 | __u64 from_ip; | ||
| 150 | __u64 to_ip; | ||
| 151 | } lbr; | ||
| 152 | /* BTS_TASK_ARRIVES or | ||
| 153 | BTS_TASK_DEPARTS */ | ||
| 154 | __u64 jiffies; | ||
| 155 | } variant; | ||
| 156 | }; | ||
| 157 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 158 | |||
| 130 | #ifdef __KERNEL__ | 159 | #ifdef __KERNEL__ |
| 131 | 160 | ||
| 132 | /* the DS BTS struct is used for ptrace as well */ | 161 | #include <linux/init.h> |
| 133 | #include <asm/ds.h> | ||
| 134 | 162 | ||
| 163 | struct cpuinfo_x86; | ||
| 135 | struct task_struct; | 164 | struct task_struct; |
| 136 | 165 | ||
| 166 | #ifdef CONFIG_X86_PTRACE_BTS | ||
| 167 | extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *); | ||
| 137 | extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); | 168 | extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); |
| 169 | #else | ||
| 170 | #define ptrace_bts_init_intel(config) do {} while (0) | ||
| 171 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
| 138 | 172 | ||
| 139 | extern unsigned long profile_pc(struct pt_regs *regs); | 173 | extern unsigned long profile_pc(struct pt_regs *regs); |
| 140 | 174 | ||
