diff options
Diffstat (limited to 'kernel/profile.c')
| -rw-r--r-- | kernel/profile.c | 39 |
1 files changed, 30 insertions, 9 deletions
diff --git a/kernel/profile.c b/kernel/profile.c index 15b012df4ff1..04fd84e8cdbe 100644 --- a/kernel/profile.c +++ b/kernel/profile.c | |||
| @@ -40,7 +40,7 @@ int (*timer_hook)(struct pt_regs *) __read_mostly; | |||
| 40 | 40 | ||
| 41 | static atomic_t *prof_buffer; | 41 | static atomic_t *prof_buffer; |
| 42 | static unsigned long prof_len, prof_shift; | 42 | static unsigned long prof_len, prof_shift; |
| 43 | static int prof_on __read_mostly; | 43 | int prof_on __read_mostly; |
| 44 | static cpumask_t prof_cpu_mask = CPU_MASK_ALL; | 44 | static cpumask_t prof_cpu_mask = CPU_MASK_ALL; |
| 45 | #ifdef CONFIG_SMP | 45 | #ifdef CONFIG_SMP |
| 46 | static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); | 46 | static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); |
| @@ -51,9 +51,19 @@ static DEFINE_MUTEX(profile_flip_mutex); | |||
| 51 | static int __init profile_setup(char * str) | 51 | static int __init profile_setup(char * str) |
| 52 | { | 52 | { |
| 53 | static char __initdata schedstr[] = "schedule"; | 53 | static char __initdata schedstr[] = "schedule"; |
| 54 | static char __initdata sleepstr[] = "sleep"; | ||
| 54 | int par; | 55 | int par; |
| 55 | 56 | ||
| 56 | if (!strncmp(str, schedstr, strlen(schedstr))) { | 57 | if (!strncmp(str, sleepstr, strlen(sleepstr))) { |
| 58 | prof_on = SLEEP_PROFILING; | ||
| 59 | if (str[strlen(sleepstr)] == ',') | ||
| 60 | str += strlen(sleepstr) + 1; | ||
| 61 | if (get_option(&str, &par)) | ||
| 62 | prof_shift = par; | ||
| 63 | printk(KERN_INFO | ||
| 64 | "kernel sleep profiling enabled (shift: %ld)\n", | ||
| 65 | prof_shift); | ||
| 66 | } else if (!strncmp(str, sleepstr, strlen(sleepstr))) { | ||
| 57 | prof_on = SCHED_PROFILING; | 67 | prof_on = SCHED_PROFILING; |
| 58 | if (str[strlen(schedstr)] == ',') | 68 | if (str[strlen(schedstr)] == ',') |
| 59 | str += strlen(schedstr) + 1; | 69 | str += strlen(schedstr) + 1; |
| @@ -204,7 +214,8 @@ EXPORT_SYMBOL_GPL(profile_event_unregister); | |||
| 204 | * positions to which hits are accounted during short intervals (e.g. | 214 | * positions to which hits are accounted during short intervals (e.g. |
| 205 | * several seconds) is usually very small. Exclusion from buffer | 215 | * several seconds) is usually very small. Exclusion from buffer |
| 206 | * flipping is provided by interrupt disablement (note that for | 216 | * flipping is provided by interrupt disablement (note that for |
| 207 | * SCHED_PROFILING profile_hit() may be called from process context). | 217 | * SCHED_PROFILING or SLEEP_PROFILING profile_hit() may be called from |
| 218 | * process context). | ||
| 208 | * The hash function is meant to be lightweight as opposed to strong, | 219 | * The hash function is meant to be lightweight as opposed to strong, |
| 209 | * and was vaguely inspired by ppc64 firmware-supported inverted | 220 | * and was vaguely inspired by ppc64 firmware-supported inverted |
| 210 | * pagetable hash functions, but uses a full hashtable full of finite | 221 | * pagetable hash functions, but uses a full hashtable full of finite |
| @@ -257,7 +268,7 @@ static void profile_discard_flip_buffers(void) | |||
| 257 | mutex_unlock(&profile_flip_mutex); | 268 | mutex_unlock(&profile_flip_mutex); |
| 258 | } | 269 | } |
| 259 | 270 | ||
| 260 | void profile_hit(int type, void *__pc) | 271 | void profile_hits(int type, void *__pc, unsigned int nr_hits) |
| 261 | { | 272 | { |
| 262 | unsigned long primary, secondary, flags, pc = (unsigned long)__pc; | 273 | unsigned long primary, secondary, flags, pc = (unsigned long)__pc; |
| 263 | int i, j, cpu; | 274 | int i, j, cpu; |
| @@ -274,21 +285,31 @@ void profile_hit(int type, void *__pc) | |||
| 274 | put_cpu(); | 285 | put_cpu(); |
| 275 | return; | 286 | return; |
| 276 | } | 287 | } |
| 288 | /* | ||
| 289 | * We buffer the global profiler buffer into a per-CPU | ||
| 290 | * queue and thus reduce the number of global (and possibly | ||
| 291 | * NUMA-alien) accesses. The write-queue is self-coalescing: | ||
| 292 | */ | ||
| 277 | local_irq_save(flags); | 293 | local_irq_save(flags); |
| 278 | do { | 294 | do { |
| 279 | for (j = 0; j < PROFILE_GRPSZ; ++j) { | 295 | for (j = 0; j < PROFILE_GRPSZ; ++j) { |
| 280 | if (hits[i + j].pc == pc) { | 296 | if (hits[i + j].pc == pc) { |
| 281 | hits[i + j].hits++; | 297 | hits[i + j].hits += nr_hits; |
| 282 | goto out; | 298 | goto out; |
| 283 | } else if (!hits[i + j].hits) { | 299 | } else if (!hits[i + j].hits) { |
| 284 | hits[i + j].pc = pc; | 300 | hits[i + j].pc = pc; |
| 285 | hits[i + j].hits = 1; | 301 | hits[i + j].hits = nr_hits; |
| 286 | goto out; | 302 | goto out; |
| 287 | } | 303 | } |
| 288 | } | 304 | } |
| 289 | i = (i + secondary) & (NR_PROFILE_HIT - 1); | 305 | i = (i + secondary) & (NR_PROFILE_HIT - 1); |
| 290 | } while (i != primary); | 306 | } while (i != primary); |
| 291 | atomic_inc(&prof_buffer[pc]); | 307 | |
| 308 | /* | ||
| 309 | * Add the current hit(s) and flush the write-queue out | ||
| 310 | * to the global buffer: | ||
| 311 | */ | ||
| 312 | atomic_add(nr_hits, &prof_buffer[pc]); | ||
| 292 | for (i = 0; i < NR_PROFILE_HIT; ++i) { | 313 | for (i = 0; i < NR_PROFILE_HIT; ++i) { |
| 293 | atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); | 314 | atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); |
| 294 | hits[i].pc = hits[i].hits = 0; | 315 | hits[i].pc = hits[i].hits = 0; |
| @@ -356,14 +377,14 @@ static int __devinit profile_cpu_callback(struct notifier_block *info, | |||
| 356 | #define profile_flip_buffers() do { } while (0) | 377 | #define profile_flip_buffers() do { } while (0) |
| 357 | #define profile_discard_flip_buffers() do { } while (0) | 378 | #define profile_discard_flip_buffers() do { } while (0) |
| 358 | 379 | ||
| 359 | void profile_hit(int type, void *__pc) | 380 | void profile_hits(int type, void *__pc, unsigned int nr_hits) |
| 360 | { | 381 | { |
| 361 | unsigned long pc; | 382 | unsigned long pc; |
| 362 | 383 | ||
| 363 | if (prof_on != type || !prof_buffer) | 384 | if (prof_on != type || !prof_buffer) |
| 364 | return; | 385 | return; |
| 365 | pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift; | 386 | pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift; |
| 366 | atomic_inc(&prof_buffer[min(pc, prof_len - 1)]); | 387 | atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]); |
| 367 | } | 388 | } |
| 368 | #endif /* !CONFIG_SMP */ | 389 | #endif /* !CONFIG_SMP */ |
| 369 | 390 | ||
