aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/acct.c6
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/futex.c2
-rw-r--r--kernel/kmod.c1
-rw-r--r--kernel/kprobes.c6
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/perf_counter.c322
-rw-r--r--kernel/pid.c7
-rw-r--r--kernel/ptrace.c4
-rw-r--r--kernel/rcutree.c3
-rw-r--r--kernel/resource.c2
-rw-r--r--kernel/sched.c14
-rw-r--r--kernel/sysctl.c13
-rw-r--r--kernel/time/timer_stats.c16
-rw-r--r--kernel/timer.c2
-rw-r--r--kernel/trace/Kconfig6
-rw-r--r--kernel/trace/ftrace.c60
-rw-r--r--kernel/trace/ring_buffer.c11
-rw-r--r--kernel/trace/trace.c23
-rw-r--r--kernel/trace/trace.h7
-rw-r--r--kernel/trace/trace_event_types.h3
-rw-r--r--kernel/trace/trace_events.c28
-rw-r--r--kernel/trace/trace_functions.c3
-rw-r--r--kernel/trace/trace_output.c3
-rw-r--r--kernel/trace/trace_printk.c26
-rw-r--r--kernel/trace/trace_stack.c4
-rw-r--r--kernel/trace/trace_stat.c6
29 files changed, 421 insertions, 166 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 780c8dcf451..2093a691f1c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -96,6 +96,7 @@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
96obj-$(CONFIG_FUNCTION_TRACER) += trace/ 96obj-$(CONFIG_FUNCTION_TRACER) += trace/
97obj-$(CONFIG_TRACING) += trace/ 97obj-$(CONFIG_TRACING) += trace/
98obj-$(CONFIG_X86_DS) += trace/ 98obj-$(CONFIG_X86_DS) += trace/
99obj-$(CONFIG_RING_BUFFER) += trace/
99obj-$(CONFIG_SMP) += sched_cpupri.o 100obj-$(CONFIG_SMP) += sched_cpupri.o
100obj-$(CONFIG_SLOW_WORK) += slow-work.o 101obj-$(CONFIG_SLOW_WORK) += slow-work.o
101obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o 102obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
diff --git a/kernel/acct.c b/kernel/acct.c
index 7afa3156416..9f3391090b3 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -215,6 +215,7 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
215static int acct_on(char *name) 215static int acct_on(char *name)
216{ 216{
217 struct file *file; 217 struct file *file;
218 struct vfsmount *mnt;
218 int error; 219 int error;
219 struct pid_namespace *ns; 220 struct pid_namespace *ns;
220 struct bsd_acct_struct *acct = NULL; 221 struct bsd_acct_struct *acct = NULL;
@@ -256,11 +257,12 @@ static int acct_on(char *name)
256 acct = NULL; 257 acct = NULL;
257 } 258 }
258 259
259 mnt_pin(file->f_path.mnt); 260 mnt = file->f_path.mnt;
261 mnt_pin(mnt);
260 acct_file_reopen(ns->bacct, file, ns); 262 acct_file_reopen(ns->bacct, file, ns);
261 spin_unlock(&acct_lock); 263 spin_unlock(&acct_lock);
262 264
263 mntput(file->f_path.mnt); /* it's pinned, now give up active reference */ 265 mntput(mnt); /* it's pinned, now give up active reference */
264 kfree(acct); 266 kfree(acct);
265 267
266 return 0; 268 return 0;
diff --git a/kernel/exit.c b/kernel/exit.c
index 628d41f0dd5..869dc221733 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -12,7 +12,6 @@
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/personality.h> 13#include <linux/personality.h>
14#include <linux/tty.h> 14#include <linux/tty.h>
15#include <linux/mnt_namespace.h>
16#include <linux/iocontext.h> 15#include <linux/iocontext.h>
17#include <linux/key.h> 16#include <linux/key.h>
18#include <linux/security.h> 17#include <linux/security.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 467746b3f0a..bd295922887 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -17,7 +17,6 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
19#include <linux/completion.h> 19#include <linux/completion.h>
20#include <linux/mnt_namespace.h>
21#include <linux/personality.h> 20#include <linux/personality.h>
22#include <linux/mempolicy.h> 21#include <linux/mempolicy.h>
23#include <linux/sem.h> 22#include <linux/sem.h>
diff --git a/kernel/futex.c b/kernel/futex.c
index 1c337112335..794c862125f 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -299,7 +299,7 @@ void put_futex_key(int fshared, union futex_key *key)
299static int fault_in_user_writeable(u32 __user *uaddr) 299static int fault_in_user_writeable(u32 __user *uaddr)
300{ 300{
301 int ret = get_user_pages(current, current->mm, (unsigned long)uaddr, 301 int ret = get_user_pages(current, current->mm, (unsigned long)uaddr,
302 sizeof(*uaddr), 1, 0, NULL, NULL); 302 1, 1, 0, NULL, NULL);
303 return ret < 0 ? ret : 0; 303 return ret < 0 ? ret : 0;
304} 304}
305 305
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 7e95bedb2bf..385c31a1bdb 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -24,7 +24,6 @@
24#include <linux/unistd.h> 24#include <linux/unistd.h>
25#include <linux/kmod.h> 25#include <linux/kmod.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/mnt_namespace.h>
28#include <linux/completion.h> 27#include <linux/completion.h>
29#include <linux/file.h> 28#include <linux/file.h>
30#include <linux/fdtable.h> 29#include <linux/fdtable.h>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c0fa54b276d..16b5739c516 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -237,13 +237,9 @@ static int __kprobes collect_garbage_slots(void)
237{ 237{
238 struct kprobe_insn_page *kip; 238 struct kprobe_insn_page *kip;
239 struct hlist_node *pos, *next; 239 struct hlist_node *pos, *next;
240 int safety;
241 240
242 /* Ensure no-one is preepmted on the garbages */ 241 /* Ensure no-one is preepmted on the garbages */
243 mutex_unlock(&kprobe_insn_mutex); 242 if (check_safety())
244 safety = check_safety();
245 mutex_lock(&kprobe_insn_mutex);
246 if (safety != 0)
247 return -EAGAIN; 243 return -EAGAIN;
248 244
249 hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) { 245 hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
diff --git a/kernel/module.c b/kernel/module.c
index 38928fcaff2..0a049837008 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2451,9 +2451,9 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2451 return ret; 2451 return ret;
2452 } 2452 }
2453 if (ret > 0) { 2453 if (ret > 0) {
2454 printk(KERN_WARNING "%s: '%s'->init suspiciously returned %d, " 2454 printk(KERN_WARNING
2455 "it should follow 0/-E convention\n" 2455"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
2456 KERN_WARNING "%s: loading module anyway...\n", 2456"%s: loading module anyway...\n",
2457 __func__, mod->name, ret, 2457 __func__, mod->name, ret,
2458 __func__); 2458 __func__);
2459 dump_stack(); 2459 dump_stack();
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 1a933a221ea..a641eb753b8 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -236,6 +236,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
236 236
237 list_add_rcu(&counter->event_entry, &ctx->event_list); 237 list_add_rcu(&counter->event_entry, &ctx->event_list);
238 ctx->nr_counters++; 238 ctx->nr_counters++;
239 if (counter->attr.inherit_stat)
240 ctx->nr_stat++;
239} 241}
240 242
241/* 243/*
@@ -250,6 +252,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
250 if (list_empty(&counter->list_entry)) 252 if (list_empty(&counter->list_entry))
251 return; 253 return;
252 ctx->nr_counters--; 254 ctx->nr_counters--;
255 if (counter->attr.inherit_stat)
256 ctx->nr_stat--;
253 257
254 list_del_init(&counter->list_entry); 258 list_del_init(&counter->list_entry);
255 list_del_rcu(&counter->event_entry); 259 list_del_rcu(&counter->event_entry);
@@ -1006,6 +1010,81 @@ static int context_equiv(struct perf_counter_context *ctx1,
1006 && !ctx1->pin_count && !ctx2->pin_count; 1010 && !ctx1->pin_count && !ctx2->pin_count;
1007} 1011}
1008 1012
1013static void __perf_counter_read(void *counter);
1014
1015static void __perf_counter_sync_stat(struct perf_counter *counter,
1016 struct perf_counter *next_counter)
1017{
1018 u64 value;
1019
1020 if (!counter->attr.inherit_stat)
1021 return;
1022
1023 /*
1024 * Update the counter value, we cannot use perf_counter_read()
1025 * because we're in the middle of a context switch and have IRQs
1026 * disabled, which upsets smp_call_function_single(), however
1027 * we know the counter must be on the current CPU, therefore we
1028 * don't need to use it.
1029 */
1030 switch (counter->state) {
1031 case PERF_COUNTER_STATE_ACTIVE:
1032 __perf_counter_read(counter);
1033 break;
1034
1035 case PERF_COUNTER_STATE_INACTIVE:
1036 update_counter_times(counter);
1037 break;
1038
1039 default:
1040 break;
1041 }
1042
1043 /*
1044 * In order to keep per-task stats reliable we need to flip the counter
1045 * values when we flip the contexts.
1046 */
1047 value = atomic64_read(&next_counter->count);
1048 value = atomic64_xchg(&counter->count, value);
1049 atomic64_set(&next_counter->count, value);
1050
1051 swap(counter->total_time_enabled, next_counter->total_time_enabled);
1052 swap(counter->total_time_running, next_counter->total_time_running);
1053
1054 /*
1055 * Since we swizzled the values, update the user visible data too.
1056 */
1057 perf_counter_update_userpage(counter);
1058 perf_counter_update_userpage(next_counter);
1059}
1060
1061#define list_next_entry(pos, member) \
1062 list_entry(pos->member.next, typeof(*pos), member)
1063
1064static void perf_counter_sync_stat(struct perf_counter_context *ctx,
1065 struct perf_counter_context *next_ctx)
1066{
1067 struct perf_counter *counter, *next_counter;
1068
1069 if (!ctx->nr_stat)
1070 return;
1071
1072 counter = list_first_entry(&ctx->event_list,
1073 struct perf_counter, event_entry);
1074
1075 next_counter = list_first_entry(&next_ctx->event_list,
1076 struct perf_counter, event_entry);
1077
1078 while (&counter->event_entry != &ctx->event_list &&
1079 &next_counter->event_entry != &next_ctx->event_list) {
1080
1081 __perf_counter_sync_stat(counter, next_counter);
1082
1083 counter = list_next_entry(counter, event_entry);
1084 next_counter = list_next_entry(counter, event_entry);
1085 }
1086}
1087
1009/* 1088/*
1010 * Called from scheduler to remove the counters of the current task, 1089 * Called from scheduler to remove the counters of the current task,
1011 * with interrupts disabled. 1090 * with interrupts disabled.
@@ -1061,6 +1140,8 @@ void perf_counter_task_sched_out(struct task_struct *task,
1061 ctx->task = next; 1140 ctx->task = next;
1062 next_ctx->task = task; 1141 next_ctx->task = task;
1063 do_switch = 0; 1142 do_switch = 0;
1143
1144 perf_counter_sync_stat(ctx, next_ctx);
1064 } 1145 }
1065 spin_unlock(&next_ctx->lock); 1146 spin_unlock(&next_ctx->lock);
1066 spin_unlock(&ctx->lock); 1147 spin_unlock(&ctx->lock);
@@ -1348,9 +1429,56 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
1348} 1429}
1349 1430
1350/* 1431/*
1432 * Enable all of a task's counters that have been marked enable-on-exec.
1433 * This expects task == current.
1434 */
1435static void perf_counter_enable_on_exec(struct task_struct *task)
1436{
1437 struct perf_counter_context *ctx;
1438 struct perf_counter *counter;
1439 unsigned long flags;
1440 int enabled = 0;
1441
1442 local_irq_save(flags);
1443 ctx = task->perf_counter_ctxp;
1444 if (!ctx || !ctx->nr_counters)
1445 goto out;
1446
1447 __perf_counter_task_sched_out(ctx);
1448
1449 spin_lock(&ctx->lock);
1450
1451 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
1452 if (!counter->attr.enable_on_exec)
1453 continue;
1454 counter->attr.enable_on_exec = 0;
1455 if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
1456 continue;
1457 counter->state = PERF_COUNTER_STATE_INACTIVE;
1458 counter->tstamp_enabled =
1459 ctx->time - counter->total_time_enabled;
1460 enabled = 1;
1461 }
1462
1463 /*
1464 * Unclone this context if we enabled any counter.
1465 */
1466 if (enabled && ctx->parent_ctx) {
1467 put_ctx(ctx->parent_ctx);
1468 ctx->parent_ctx = NULL;
1469 }
1470
1471 spin_unlock(&ctx->lock);
1472
1473 perf_counter_task_sched_in(task, smp_processor_id());
1474 out:
1475 local_irq_restore(flags);
1476}
1477
1478/*
1351 * Cross CPU call to read the hardware counter 1479 * Cross CPU call to read the hardware counter
1352 */ 1480 */
1353static void __read(void *info) 1481static void __perf_counter_read(void *info)
1354{ 1482{
1355 struct perf_counter *counter = info; 1483 struct perf_counter *counter = info;
1356 struct perf_counter_context *ctx = counter->ctx; 1484 struct perf_counter_context *ctx = counter->ctx;
@@ -1372,7 +1500,7 @@ static u64 perf_counter_read(struct perf_counter *counter)
1372 */ 1500 */
1373 if (counter->state == PERF_COUNTER_STATE_ACTIVE) { 1501 if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
1374 smp_call_function_single(counter->oncpu, 1502 smp_call_function_single(counter->oncpu,
1375 __read, counter, 1); 1503 __perf_counter_read, counter, 1);
1376 } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { 1504 } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
1377 update_counter_times(counter); 1505 update_counter_times(counter);
1378 } 1506 }
@@ -1508,11 +1636,13 @@ static void free_counter(struct perf_counter *counter)
1508{ 1636{
1509 perf_pending_sync(counter); 1637 perf_pending_sync(counter);
1510 1638
1511 atomic_dec(&nr_counters); 1639 if (!counter->parent) {
1512 if (counter->attr.mmap) 1640 atomic_dec(&nr_counters);
1513 atomic_dec(&nr_mmap_counters); 1641 if (counter->attr.mmap)
1514 if (counter->attr.comm) 1642 atomic_dec(&nr_mmap_counters);
1515 atomic_dec(&nr_comm_counters); 1643 if (counter->attr.comm)
1644 atomic_dec(&nr_comm_counters);
1645 }
1516 1646
1517 if (counter->destroy) 1647 if (counter->destroy)
1518 counter->destroy(counter); 1648 counter->destroy(counter);
@@ -1751,6 +1881,14 @@ int perf_counter_task_disable(void)
1751 return 0; 1881 return 0;
1752} 1882}
1753 1883
1884static int perf_counter_index(struct perf_counter *counter)
1885{
1886 if (counter->state != PERF_COUNTER_STATE_ACTIVE)
1887 return 0;
1888
1889 return counter->hw.idx + 1 - PERF_COUNTER_INDEX_OFFSET;
1890}
1891
1754/* 1892/*
1755 * Callers need to ensure there can be no nesting of this function, otherwise 1893 * Callers need to ensure there can be no nesting of this function, otherwise
1756 * the seqlock logic goes bad. We can not serialize this because the arch 1894 * the seqlock logic goes bad. We can not serialize this because the arch
@@ -1775,11 +1913,17 @@ void perf_counter_update_userpage(struct perf_counter *counter)
1775 preempt_disable(); 1913 preempt_disable();
1776 ++userpg->lock; 1914 ++userpg->lock;
1777 barrier(); 1915 barrier();
1778 userpg->index = counter->hw.idx; 1916 userpg->index = perf_counter_index(counter);
1779 userpg->offset = atomic64_read(&counter->count); 1917 userpg->offset = atomic64_read(&counter->count);
1780 if (counter->state == PERF_COUNTER_STATE_ACTIVE) 1918 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
1781 userpg->offset -= atomic64_read(&counter->hw.prev_count); 1919 userpg->offset -= atomic64_read(&counter->hw.prev_count);
1782 1920
1921 userpg->time_enabled = counter->total_time_enabled +
1922 atomic64_read(&counter->child_total_time_enabled);
1923
1924 userpg->time_running = counter->total_time_running +
1925 atomic64_read(&counter->child_total_time_running);
1926
1783 barrier(); 1927 barrier();
1784 ++userpg->lock; 1928 ++userpg->lock;
1785 preempt_enable(); 1929 preempt_enable();
@@ -1876,7 +2020,7 @@ fail:
1876 2020
1877static void perf_mmap_free_page(unsigned long addr) 2021static void perf_mmap_free_page(unsigned long addr)
1878{ 2022{
1879 struct page *page = virt_to_page(addr); 2023 struct page *page = virt_to_page((void *)addr);
1880 2024
1881 page->mapping = NULL; 2025 page->mapping = NULL;
1882 __free_page(page); 2026 __free_page(page);
@@ -2483,15 +2627,14 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2483 u32 cpu, reserved; 2627 u32 cpu, reserved;
2484 } cpu_entry; 2628 } cpu_entry;
2485 2629
2486 header.type = 0; 2630 header.type = PERF_EVENT_SAMPLE;
2487 header.size = sizeof(header); 2631 header.size = sizeof(header);
2488 2632
2489 header.misc = PERF_EVENT_MISC_OVERFLOW; 2633 header.misc = 0;
2490 header.misc |= perf_misc_flags(data->regs); 2634 header.misc |= perf_misc_flags(data->regs);
2491 2635
2492 if (sample_type & PERF_SAMPLE_IP) { 2636 if (sample_type & PERF_SAMPLE_IP) {
2493 ip = perf_instruction_pointer(data->regs); 2637 ip = perf_instruction_pointer(data->regs);
2494 header.type |= PERF_SAMPLE_IP;
2495 header.size += sizeof(ip); 2638 header.size += sizeof(ip);
2496 } 2639 }
2497 2640
@@ -2500,7 +2643,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2500 tid_entry.pid = perf_counter_pid(counter, current); 2643 tid_entry.pid = perf_counter_pid(counter, current);
2501 tid_entry.tid = perf_counter_tid(counter, current); 2644 tid_entry.tid = perf_counter_tid(counter, current);
2502 2645
2503 header.type |= PERF_SAMPLE_TID;
2504 header.size += sizeof(tid_entry); 2646 header.size += sizeof(tid_entry);
2505 } 2647 }
2506 2648
@@ -2510,34 +2652,25 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2510 */ 2652 */
2511 time = sched_clock(); 2653 time = sched_clock();
2512 2654
2513 header.type |= PERF_SAMPLE_TIME;
2514 header.size += sizeof(u64); 2655 header.size += sizeof(u64);
2515 } 2656 }
2516 2657
2517 if (sample_type & PERF_SAMPLE_ADDR) { 2658 if (sample_type & PERF_SAMPLE_ADDR)
2518 header.type |= PERF_SAMPLE_ADDR;
2519 header.size += sizeof(u64); 2659 header.size += sizeof(u64);
2520 }
2521 2660
2522 if (sample_type & PERF_SAMPLE_ID) { 2661 if (sample_type & PERF_SAMPLE_ID)
2523 header.type |= PERF_SAMPLE_ID;
2524 header.size += sizeof(u64); 2662 header.size += sizeof(u64);
2525 }
2526 2663
2527 if (sample_type & PERF_SAMPLE_CPU) { 2664 if (sample_type & PERF_SAMPLE_CPU) {
2528 header.type |= PERF_SAMPLE_CPU;
2529 header.size += sizeof(cpu_entry); 2665 header.size += sizeof(cpu_entry);
2530 2666
2531 cpu_entry.cpu = raw_smp_processor_id(); 2667 cpu_entry.cpu = raw_smp_processor_id();
2532 } 2668 }
2533 2669
2534 if (sample_type & PERF_SAMPLE_PERIOD) { 2670 if (sample_type & PERF_SAMPLE_PERIOD)
2535 header.type |= PERF_SAMPLE_PERIOD;
2536 header.size += sizeof(u64); 2671 header.size += sizeof(u64);
2537 }
2538 2672
2539 if (sample_type & PERF_SAMPLE_GROUP) { 2673 if (sample_type & PERF_SAMPLE_GROUP) {
2540 header.type |= PERF_SAMPLE_GROUP;
2541 header.size += sizeof(u64) + 2674 header.size += sizeof(u64) +
2542 counter->nr_siblings * sizeof(group_entry); 2675 counter->nr_siblings * sizeof(group_entry);
2543 } 2676 }
@@ -2547,10 +2680,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2547 2680
2548 if (callchain) { 2681 if (callchain) {
2549 callchain_size = (1 + callchain->nr) * sizeof(u64); 2682 callchain_size = (1 + callchain->nr) * sizeof(u64);
2550
2551 header.type |= PERF_SAMPLE_CALLCHAIN;
2552 header.size += callchain_size; 2683 header.size += callchain_size;
2553 } 2684 } else
2685 header.size += sizeof(u64);
2554 } 2686 }
2555 2687
2556 ret = perf_output_begin(&handle, counter, header.size, nmi, 1); 2688 ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
@@ -2601,13 +2733,79 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2601 } 2733 }
2602 } 2734 }
2603 2735
2604 if (callchain) 2736 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2605 perf_output_copy(&handle, callchain, callchain_size); 2737 if (callchain)
2738 perf_output_copy(&handle, callchain, callchain_size);
2739 else {
2740 u64 nr = 0;
2741 perf_output_put(&handle, nr);
2742 }
2743 }
2606 2744
2607 perf_output_end(&handle); 2745 perf_output_end(&handle);
2608} 2746}
2609 2747
2610/* 2748/*
2749 * read event
2750 */
2751
2752struct perf_read_event {
2753 struct perf_event_header header;
2754
2755 u32 pid;
2756 u32 tid;
2757 u64 value;
2758 u64 format[3];
2759};
2760
2761static void
2762perf_counter_read_event(struct perf_counter *counter,
2763 struct task_struct *task)
2764{
2765 struct perf_output_handle handle;
2766 struct perf_read_event event = {
2767 .header = {
2768 .type = PERF_EVENT_READ,
2769 .misc = 0,
2770 .size = sizeof(event) - sizeof(event.format),
2771 },
2772 .pid = perf_counter_pid(counter, task),
2773 .tid = perf_counter_tid(counter, task),
2774 .value = atomic64_read(&counter->count),
2775 };
2776 int ret, i = 0;
2777
2778 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2779 event.header.size += sizeof(u64);
2780 event.format[i++] = counter->total_time_enabled;
2781 }
2782
2783 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2784 event.header.size += sizeof(u64);
2785 event.format[i++] = counter->total_time_running;
2786 }
2787
2788 if (counter->attr.read_format & PERF_FORMAT_ID) {
2789 u64 id;
2790
2791 event.header.size += sizeof(u64);
2792 if (counter->parent)
2793 id = counter->parent->id;
2794 else
2795 id = counter->id;
2796
2797 event.format[i++] = id;
2798 }
2799
2800 ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
2801 if (ret)
2802 return;
2803
2804 perf_output_copy(&handle, &event, event.header.size);
2805 perf_output_end(&handle);
2806}
2807
2808/*
2611 * fork tracking 2809 * fork tracking
2612 */ 2810 */
2613 2811
@@ -2798,6 +2996,9 @@ void perf_counter_comm(struct task_struct *task)
2798{ 2996{
2799 struct perf_comm_event comm_event; 2997 struct perf_comm_event comm_event;
2800 2998
2999 if (task->perf_counter_ctxp)
3000 perf_counter_enable_on_exec(task);
3001
2801 if (!atomic_read(&nr_comm_counters)) 3002 if (!atomic_read(&nr_comm_counters))
2802 return; 3003 return;
2803 3004
@@ -3317,8 +3518,8 @@ out:
3317 put_cpu_var(perf_cpu_context); 3518 put_cpu_var(perf_cpu_context);
3318} 3519}
3319 3520
3320void 3521void __perf_swcounter_event(u32 event, u64 nr, int nmi,
3321perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) 3522 struct pt_regs *regs, u64 addr)
3322{ 3523{
3323 struct perf_sample_data data = { 3524 struct perf_sample_data data = {
3324 .regs = regs, 3525 .regs = regs,
@@ -3509,9 +3710,21 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
3509} 3710}
3510#endif 3711#endif
3511 3712
3713atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX];
3714
3715static void sw_perf_counter_destroy(struct perf_counter *counter)
3716{
3717 u64 event = counter->attr.config;
3718
3719 WARN_ON(counter->parent);
3720
3721 atomic_dec(&perf_swcounter_enabled[event]);
3722}
3723
3512static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) 3724static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
3513{ 3725{
3514 const struct pmu *pmu = NULL; 3726 const struct pmu *pmu = NULL;
3727 u64 event = counter->attr.config;
3515 3728
3516 /* 3729 /*
3517 * Software counters (currently) can't in general distinguish 3730 * Software counters (currently) can't in general distinguish
@@ -3520,7 +3733,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
3520 * to be kernel events, and page faults are never hypervisor 3733 * to be kernel events, and page faults are never hypervisor
3521 * events. 3734 * events.
3522 */ 3735 */
3523 switch (counter->attr.config) { 3736 switch (event) {
3524 case PERF_COUNT_SW_CPU_CLOCK: 3737 case PERF_COUNT_SW_CPU_CLOCK:
3525 pmu = &perf_ops_cpu_clock; 3738 pmu = &perf_ops_cpu_clock;
3526 3739
@@ -3541,6 +3754,10 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
3541 case PERF_COUNT_SW_PAGE_FAULTS_MAJ: 3754 case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
3542 case PERF_COUNT_SW_CONTEXT_SWITCHES: 3755 case PERF_COUNT_SW_CONTEXT_SWITCHES:
3543 case PERF_COUNT_SW_CPU_MIGRATIONS: 3756 case PERF_COUNT_SW_CPU_MIGRATIONS:
3757 if (!counter->parent) {
3758 atomic_inc(&perf_swcounter_enabled[event]);
3759 counter->destroy = sw_perf_counter_destroy;
3760 }
3544 pmu = &perf_ops_generic; 3761 pmu = &perf_ops_generic;
3545 break; 3762 break;
3546 } 3763 }
@@ -3556,6 +3773,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
3556 int cpu, 3773 int cpu,
3557 struct perf_counter_context *ctx, 3774 struct perf_counter_context *ctx,
3558 struct perf_counter *group_leader, 3775 struct perf_counter *group_leader,
3776 struct perf_counter *parent_counter,
3559 gfp_t gfpflags) 3777 gfp_t gfpflags)
3560{ 3778{
3561 const struct pmu *pmu; 3779 const struct pmu *pmu;
@@ -3591,6 +3809,8 @@ perf_counter_alloc(struct perf_counter_attr *attr,
3591 counter->ctx = ctx; 3809 counter->ctx = ctx;
3592 counter->oncpu = -1; 3810 counter->oncpu = -1;
3593 3811
3812 counter->parent = parent_counter;
3813
3594 counter->ns = get_pid_ns(current->nsproxy->pid_ns); 3814 counter->ns = get_pid_ns(current->nsproxy->pid_ns);
3595 counter->id = atomic64_inc_return(&perf_counter_id); 3815 counter->id = atomic64_inc_return(&perf_counter_id);
3596 3816
@@ -3648,11 +3868,13 @@ done:
3648 3868
3649 counter->pmu = pmu; 3869 counter->pmu = pmu;
3650 3870
3651 atomic_inc(&nr_counters); 3871 if (!counter->parent) {
3652 if (counter->attr.mmap) 3872 atomic_inc(&nr_counters);
3653 atomic_inc(&nr_mmap_counters); 3873 if (counter->attr.mmap)
3654 if (counter->attr.comm) 3874 atomic_inc(&nr_mmap_counters);
3655 atomic_inc(&nr_comm_counters); 3875 if (counter->attr.comm)
3876 atomic_inc(&nr_comm_counters);
3877 }
3656 3878
3657 return counter; 3879 return counter;
3658} 3880}
@@ -3815,7 +4037,7 @@ SYSCALL_DEFINE5(perf_counter_open,
3815 } 4037 }
3816 4038
3817 counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, 4039 counter = perf_counter_alloc(&attr, cpu, ctx, group_leader,
3818 GFP_KERNEL); 4040 NULL, GFP_KERNEL);
3819 ret = PTR_ERR(counter); 4041 ret = PTR_ERR(counter);
3820 if (IS_ERR(counter)) 4042 if (IS_ERR(counter))
3821 goto err_put_context; 4043 goto err_put_context;
@@ -3881,7 +4103,8 @@ inherit_counter(struct perf_counter *parent_counter,
3881 4103
3882 child_counter = perf_counter_alloc(&parent_counter->attr, 4104 child_counter = perf_counter_alloc(&parent_counter->attr,
3883 parent_counter->cpu, child_ctx, 4105 parent_counter->cpu, child_ctx,
3884 group_leader, GFP_KERNEL); 4106 group_leader, parent_counter,
4107 GFP_KERNEL);
3885 if (IS_ERR(child_counter)) 4108 if (IS_ERR(child_counter))
3886 return child_counter; 4109 return child_counter;
3887 get_ctx(child_ctx); 4110 get_ctx(child_ctx);
@@ -3904,12 +4127,6 @@ inherit_counter(struct perf_counter *parent_counter,
3904 */ 4127 */
3905 add_counter_to_ctx(child_counter, child_ctx); 4128 add_counter_to_ctx(child_counter, child_ctx);
3906 4129
3907 child_counter->parent = parent_counter;
3908 /*
3909 * inherit into child's child as well:
3910 */
3911 child_counter->attr.inherit = 1;
3912
3913 /* 4130 /*
3914 * Get a reference to the parent filp - we will fput it 4131 * Get a reference to the parent filp - we will fput it
3915 * when the child counter exits. This is safe to do because 4132 * when the child counter exits. This is safe to do because
@@ -3953,10 +4170,14 @@ static int inherit_group(struct perf_counter *parent_counter,
3953} 4170}
3954 4171
3955static void sync_child_counter(struct perf_counter *child_counter, 4172static void sync_child_counter(struct perf_counter *child_counter,
3956 struct perf_counter *parent_counter) 4173 struct task_struct *child)
3957{ 4174{
4175 struct perf_counter *parent_counter = child_counter->parent;
3958 u64 child_val; 4176 u64 child_val;
3959 4177
4178 if (child_counter->attr.inherit_stat)
4179 perf_counter_read_event(child_counter, child);
4180
3960 child_val = atomic64_read(&child_counter->count); 4181 child_val = atomic64_read(&child_counter->count);
3961 4182
3962 /* 4183 /*
@@ -3985,7 +4206,8 @@ static void sync_child_counter(struct perf_counter *child_counter,
3985 4206
3986static void 4207static void
3987__perf_counter_exit_task(struct perf_counter *child_counter, 4208__perf_counter_exit_task(struct perf_counter *child_counter,
3988 struct perf_counter_context *child_ctx) 4209 struct perf_counter_context *child_ctx,
4210 struct task_struct *child)
3989{ 4211{
3990 struct perf_counter *parent_counter; 4212 struct perf_counter *parent_counter;
3991 4213
@@ -3999,7 +4221,7 @@ __perf_counter_exit_task(struct perf_counter *child_counter,
3999 * counters need to be zapped - but otherwise linger. 4221 * counters need to be zapped - but otherwise linger.
4000 */ 4222 */
4001 if (parent_counter) { 4223 if (parent_counter) {
4002 sync_child_counter(child_counter, parent_counter); 4224 sync_child_counter(child_counter, child);
4003 free_counter(child_counter); 4225 free_counter(child_counter);
4004 } 4226 }
4005} 4227}
@@ -4061,7 +4283,7 @@ void perf_counter_exit_task(struct task_struct *child)
4061again: 4283again:
4062 list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, 4284 list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
4063 list_entry) 4285 list_entry)
4064 __perf_counter_exit_task(child_counter, child_ctx); 4286 __perf_counter_exit_task(child_counter, child_ctx, child);
4065 4287
4066 /* 4288 /*
4067 * If the last counter was a group counter, it will have appended all 4289 * If the last counter was a group counter, it will have appended all
diff --git a/kernel/pid.c b/kernel/pid.c
index 31310b5d3f5..5fa1db48d8b 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -36,6 +36,7 @@
36#include <linux/pid_namespace.h> 36#include <linux/pid_namespace.h>
37#include <linux/init_task.h> 37#include <linux/init_task.h>
38#include <linux/syscalls.h> 38#include <linux/syscalls.h>
39#include <linux/kmemleak.h>
39 40
40#define pid_hashfn(nr, ns) \ 41#define pid_hashfn(nr, ns) \
41 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) 42 hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
@@ -512,6 +513,12 @@ void __init pidhash_init(void)
512 pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash))); 513 pid_hash = alloc_bootmem(pidhash_size * sizeof(*(pid_hash)));
513 if (!pid_hash) 514 if (!pid_hash)
514 panic("Could not alloc pidhash!\n"); 515 panic("Could not alloc pidhash!\n");
516 /*
517 * pid_hash contains references to allocated struct pid objects and it
518 * must be scanned by kmemleak to avoid false positives.
519 */
520 kmemleak_alloc(pid_hash, pidhash_size * sizeof(*(pid_hash)), 0,
521 GFP_KERNEL);
515 for (i = 0; i < pidhash_size; i++) 522 for (i = 0; i < pidhash_size; i++)
516 INIT_HLIST_HEAD(&pid_hash[i]); 523 INIT_HLIST_HEAD(&pid_hash[i]);
517} 524}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 61c78b2c07b..082c320e4db 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -181,8 +181,8 @@ int ptrace_attach(struct task_struct *task)
181 * interference; SUID, SGID and LSM creds get determined differently 181 * interference; SUID, SGID and LSM creds get determined differently
182 * under ptrace. 182 * under ptrace.
183 */ 183 */
184 retval = mutex_lock_interruptible(&task->cred_guard_mutex); 184 retval = -ERESTARTNOINTR;
185 if (retval < 0) 185 if (mutex_lock_interruptible(&task->cred_guard_mutex))
186 goto out; 186 goto out;
187 187
188 task_lock(task); 188 task_lock(task);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0dccfbba6d2..7717b95c202 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1533,7 +1533,7 @@ void __init __rcu_init(void)
1533 int j; 1533 int j;
1534 struct rcu_node *rnp; 1534 struct rcu_node *rnp;
1535 1535
1536 printk(KERN_WARNING "Experimental hierarchical RCU implementation.\n"); 1536 printk(KERN_INFO "Hierarchical RCU implementation.\n");
1537#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 1537#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
1538 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); 1538 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
1539#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 1539#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
@@ -1546,7 +1546,6 @@ void __init __rcu_init(void)
1546 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i); 1546 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i);
1547 /* Register notifier for non-boot CPUs */ 1547 /* Register notifier for non-boot CPUs */
1548 register_cpu_notifier(&rcu_nb); 1548 register_cpu_notifier(&rcu_nb);
1549 printk(KERN_WARNING "Experimental hierarchical RCU init done.\n");
1550} 1549}
1551 1550
1552module_param(blimit, int, 0); 1551module_param(blimit, int, 0);
diff --git a/kernel/resource.c b/kernel/resource.c
index ac5f3a36923..78b087221c1 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -787,7 +787,7 @@ static int __init reserve_setup(char *str)
787 static struct resource reserve[MAXRESERVE]; 787 static struct resource reserve[MAXRESERVE];
788 788
789 for (;;) { 789 for (;;) {
790 int io_start, io_num; 790 unsigned int io_start, io_num;
791 int x = reserved; 791 int x = reserved;
792 792
793 if (get_option (&str, &io_start) != 2) 793 if (get_option (&str, &io_start) != 2)
diff --git a/kernel/sched.c b/kernel/sched.c
index 7c9098d186e..01f55ada359 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6541,6 +6541,11 @@ SYSCALL_DEFINE0(sched_yield)
6541 return 0; 6541 return 0;
6542} 6542}
6543 6543
6544static inline int should_resched(void)
6545{
6546 return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
6547}
6548
6544static void __cond_resched(void) 6549static void __cond_resched(void)
6545{ 6550{
6546#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 6551#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -6560,8 +6565,7 @@ static void __cond_resched(void)
6560 6565
6561int __sched _cond_resched(void) 6566int __sched _cond_resched(void)
6562{ 6567{
6563 if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) && 6568 if (should_resched()) {
6564 system_state == SYSTEM_RUNNING) {
6565 __cond_resched(); 6569 __cond_resched();
6566 return 1; 6570 return 1;
6567 } 6571 }
@@ -6579,12 +6583,12 @@ EXPORT_SYMBOL(_cond_resched);
6579 */ 6583 */
6580int cond_resched_lock(spinlock_t *lock) 6584int cond_resched_lock(spinlock_t *lock)
6581{ 6585{
6582 int resched = need_resched() && system_state == SYSTEM_RUNNING; 6586 int resched = should_resched();
6583 int ret = 0; 6587 int ret = 0;
6584 6588
6585 if (spin_needbreak(lock) || resched) { 6589 if (spin_needbreak(lock) || resched) {
6586 spin_unlock(lock); 6590 spin_unlock(lock);
6587 if (resched && need_resched()) 6591 if (resched)
6588 __cond_resched(); 6592 __cond_resched();
6589 else 6593 else
6590 cpu_relax(); 6594 cpu_relax();
@@ -6599,7 +6603,7 @@ int __sched cond_resched_softirq(void)
6599{ 6603{
6600 BUG_ON(!in_softirq()); 6604 BUG_ON(!in_softirq());
6601 6605
6602 if (need_resched() && system_state == SYSTEM_RUNNING) { 6606 if (should_resched()) {
6603 local_bh_enable(); 6607 local_bh_enable();
6604 __cond_resched(); 6608 __cond_resched();
6605 local_bh_disable(); 6609 local_bh_disable();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 62e4ff9968b..98e02328c67 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -335,7 +335,10 @@ static struct ctl_table kern_table[] = {
335 .data = &sysctl_timer_migration, 335 .data = &sysctl_timer_migration,
336 .maxlen = sizeof(unsigned int), 336 .maxlen = sizeof(unsigned int),
337 .mode = 0644, 337 .mode = 0644,
338 .proc_handler = &proc_dointvec, 338 .proc_handler = &proc_dointvec_minmax,
339 .strategy = &sysctl_intvec,
340 .extra1 = &zero,
341 .extra2 = &one,
339 }, 342 },
340#endif 343#endif
341 { 344 {
@@ -744,6 +747,14 @@ static struct ctl_table kern_table[] = {
744 .proc_handler = &proc_dointvec, 747 .proc_handler = &proc_dointvec,
745 }, 748 },
746 { 749 {
750 .ctl_name = CTL_UNNUMBERED,
751 .procname = "panic_on_io_nmi",
752 .data = &panic_on_io_nmi,
753 .maxlen = sizeof(int),
754 .mode = 0644,
755 .proc_handler = &proc_dointvec,
756 },
757 {
747 .ctl_name = KERN_BOOTLOADER_TYPE, 758 .ctl_name = KERN_BOOTLOADER_TYPE,
748 .procname = "bootloader_type", 759 .procname = "bootloader_type",
749 .data = &bootloader_type, 760 .data = &bootloader_type,
diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c
index c994530d166..4cde8b9c716 100644
--- a/kernel/time/timer_stats.c
+++ b/kernel/time/timer_stats.c
@@ -96,7 +96,7 @@ static DEFINE_MUTEX(show_mutex);
96/* 96/*
97 * Collection status, active/inactive: 97 * Collection status, active/inactive:
98 */ 98 */
99static int __read_mostly active; 99int __read_mostly timer_stats_active;
100 100
101/* 101/*
102 * Beginning/end timestamps of measurement: 102 * Beginning/end timestamps of measurement:
@@ -242,7 +242,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
242 struct entry *entry, input; 242 struct entry *entry, input;
243 unsigned long flags; 243 unsigned long flags;
244 244
245 if (likely(!active)) 245 if (likely(!timer_stats_active))
246 return; 246 return;
247 247
248 lock = &per_cpu(lookup_lock, raw_smp_processor_id()); 248 lock = &per_cpu(lookup_lock, raw_smp_processor_id());
@@ -254,7 +254,7 @@ void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
254 input.timer_flag = timer_flag; 254 input.timer_flag = timer_flag;
255 255
256 spin_lock_irqsave(lock, flags); 256 spin_lock_irqsave(lock, flags);
257 if (!active) 257 if (!timer_stats_active)
258 goto out_unlock; 258 goto out_unlock;
259 259
260 entry = tstat_lookup(&input, comm); 260 entry = tstat_lookup(&input, comm);
@@ -290,7 +290,7 @@ static int tstats_show(struct seq_file *m, void *v)
290 /* 290 /*
291 * If still active then calculate up to now: 291 * If still active then calculate up to now:
292 */ 292 */
293 if (active) 293 if (timer_stats_active)
294 time_stop = ktime_get(); 294 time_stop = ktime_get();
295 295
296 time = ktime_sub(time_stop, time_start); 296 time = ktime_sub(time_stop, time_start);
@@ -368,18 +368,18 @@ static ssize_t tstats_write(struct file *file, const char __user *buf,
368 mutex_lock(&show_mutex); 368 mutex_lock(&show_mutex);
369 switch (ctl[0]) { 369 switch (ctl[0]) {
370 case '0': 370 case '0':
371 if (active) { 371 if (timer_stats_active) {
372 active = 0; 372 timer_stats_active = 0;
373 time_stop = ktime_get(); 373 time_stop = ktime_get();
374 sync_access(); 374 sync_access();
375 } 375 }
376 break; 376 break;
377 case '1': 377 case '1':
378 if (!active) { 378 if (!timer_stats_active) {
379 reset_entries(); 379 reset_entries();
380 time_start = ktime_get(); 380 time_start = ktime_get();
381 smp_mb(); 381 smp_mb();
382 active = 1; 382 timer_stats_active = 1;
383 } 383 }
384 break; 384 break;
385 default: 385 default:
diff --git a/kernel/timer.c b/kernel/timer.c
index 54d3912f8ca..0b36b9e5cc8 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -380,6 +380,8 @@ static void timer_stats_account_timer(struct timer_list *timer)
380{ 380{
381 unsigned int flag = 0; 381 unsigned int flag = 0;
382 382
383 if (likely(!timer->start_site))
384 return;
383 if (unlikely(tbase_get_deferrable(timer->base))) 385 if (unlikely(tbase_get_deferrable(timer->base)))
384 flag |= TIMER_STATS_FLAG_DEFERRABLE; 386 flag |= TIMER_STATS_FLAG_DEFERRABLE;
385 387
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1551f47e766..019f380fd76 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -226,13 +226,13 @@ config BOOT_TRACER
226 the timings of the initcalls and traces key events and the identity 226 the timings of the initcalls and traces key events and the identity
227 of tasks that can cause boot delays, such as context-switches. 227 of tasks that can cause boot delays, such as context-switches.
228 228
229 Its aim is to be parsed by the /scripts/bootgraph.pl tool to 229 Its aim is to be parsed by the scripts/bootgraph.pl tool to
230 produce pretty graphics about boot inefficiencies, giving a visual 230 produce pretty graphics about boot inefficiencies, giving a visual
231 representation of the delays during initcalls - but the raw 231 representation of the delays during initcalls - but the raw
232 /debug/tracing/trace text output is readable too. 232 /debug/tracing/trace text output is readable too.
233 233
234 You must pass in ftrace=initcall to the kernel command line 234 You must pass in initcall_debug and ftrace=initcall to the kernel
235 to enable this on bootup. 235 command line to enable this on bootup.
236 236
237config TRACE_BRANCH_PROFILING 237config TRACE_BRANCH_PROFILING
238 bool 238 bool
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3718d55fb4c..bce9e01a29c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -291,7 +291,9 @@ function_stat_next(void *v, int idx)
291 pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK); 291 pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
292 292
293 again: 293 again:
294 rec++; 294 if (idx != 0)
295 rec++;
296
295 if ((void *)rec >= (void *)&pg->records[pg->index]) { 297 if ((void *)rec >= (void *)&pg->records[pg->index]) {
296 pg = pg->next; 298 pg = pg->next;
297 if (!pg) 299 if (!pg)
@@ -1417,10 +1419,20 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos)
1417{ 1419{
1418 struct ftrace_iterator *iter = m->private; 1420 struct ftrace_iterator *iter = m->private;
1419 void *p = NULL; 1421 void *p = NULL;
1422 loff_t l;
1423
1424 if (!(iter->flags & FTRACE_ITER_HASH))
1425 *pos = 0;
1420 1426
1421 iter->flags |= FTRACE_ITER_HASH; 1427 iter->flags |= FTRACE_ITER_HASH;
1422 1428
1423 return t_hash_next(m, p, pos); 1429 iter->hidx = 0;
1430 for (l = 0; l <= *pos; ) {
1431 p = t_hash_next(m, p, &l);
1432 if (!p)
1433 break;
1434 }
1435 return p;
1424} 1436}
1425 1437
1426static int t_hash_show(struct seq_file *m, void *v) 1438static int t_hash_show(struct seq_file *m, void *v)
@@ -1467,8 +1479,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
1467 iter->pg = iter->pg->next; 1479 iter->pg = iter->pg->next;
1468 iter->idx = 0; 1480 iter->idx = 0;
1469 goto retry; 1481 goto retry;
1470 } else {
1471 iter->idx = -1;
1472 } 1482 }
1473 } else { 1483 } else {
1474 rec = &iter->pg->records[iter->idx++]; 1484 rec = &iter->pg->records[iter->idx++];
@@ -1497,6 +1507,7 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1497{ 1507{
1498 struct ftrace_iterator *iter = m->private; 1508 struct ftrace_iterator *iter = m->private;
1499 void *p = NULL; 1509 void *p = NULL;
1510 loff_t l;
1500 1511
1501 mutex_lock(&ftrace_lock); 1512 mutex_lock(&ftrace_lock);
1502 /* 1513 /*
@@ -1508,23 +1519,21 @@ static void *t_start(struct seq_file *m, loff_t *pos)
1508 if (*pos > 0) 1519 if (*pos > 0)
1509 return t_hash_start(m, pos); 1520 return t_hash_start(m, pos);
1510 iter->flags |= FTRACE_ITER_PRINTALL; 1521 iter->flags |= FTRACE_ITER_PRINTALL;
1511 (*pos)++;
1512 return iter; 1522 return iter;
1513 } 1523 }
1514 1524
1515 if (iter->flags & FTRACE_ITER_HASH) 1525 if (iter->flags & FTRACE_ITER_HASH)
1516 return t_hash_start(m, pos); 1526 return t_hash_start(m, pos);
1517 1527
1518 if (*pos > 0) { 1528 iter->pg = ftrace_pages_start;
1519 if (iter->idx < 0) 1529 iter->idx = 0;
1520 return p; 1530 for (l = 0; l <= *pos; ) {
1521 (*pos)--; 1531 p = t_next(m, p, &l);
1522 iter->idx--; 1532 if (!p)
1533 break;
1523 } 1534 }
1524 1535
1525 p = t_next(m, p, pos); 1536 if (!p && iter->flags & FTRACE_ITER_FILTER)
1526
1527 if (!p)
1528 return t_hash_start(m, pos); 1537 return t_hash_start(m, pos);
1529 1538
1530 return p; 1539 return p;
@@ -2500,32 +2509,31 @@ int ftrace_graph_count;
2500unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly; 2509unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
2501 2510
2502static void * 2511static void *
2503g_next(struct seq_file *m, void *v, loff_t *pos) 2512__g_next(struct seq_file *m, loff_t *pos)
2504{ 2513{
2505 unsigned long *array = m->private; 2514 unsigned long *array = m->private;
2506 int index = *pos;
2507 2515
2508 (*pos)++; 2516 if (*pos >= ftrace_graph_count)
2509
2510 if (index >= ftrace_graph_count)
2511 return NULL; 2517 return NULL;
2518 return &array[*pos];
2519}
2512 2520
2513 return &array[index]; 2521static void *
2522g_next(struct seq_file *m, void *v, loff_t *pos)
2523{
2524 (*pos)++;
2525 return __g_next(m, pos);
2514} 2526}
2515 2527
2516static void *g_start(struct seq_file *m, loff_t *pos) 2528static void *g_start(struct seq_file *m, loff_t *pos)
2517{ 2529{
2518 void *p = NULL;
2519
2520 mutex_lock(&graph_lock); 2530 mutex_lock(&graph_lock);
2521 2531
2522 /* Nothing, tell g_show to print all functions are enabled */ 2532 /* Nothing, tell g_show to print all functions are enabled */
2523 if (!ftrace_graph_count && !*pos) 2533 if (!ftrace_graph_count && !*pos)
2524 return (void *)1; 2534 return (void *)1;
2525 2535
2526 p = g_next(m, p, pos); 2536 return __g_next(m, pos);
2527
2528 return p;
2529} 2537}
2530 2538
2531static void g_stop(struct seq_file *m, void *p) 2539static void g_stop(struct seq_file *m, void *p)
@@ -3152,10 +3160,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
3152 3160
3153 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 3161 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
3154 3162
3155 if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) 3163 if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
3156 goto out; 3164 goto out;
3157 3165
3158 last_ftrace_enabled = ftrace_enabled; 3166 last_ftrace_enabled = !!ftrace_enabled;
3159 3167
3160 if (ftrace_enabled) { 3168 if (ftrace_enabled) {
3161 3169
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 04dac263825..bf27bb7a63e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -1563,6 +1563,8 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
1563 return NULL; 1563 return NULL;
1564} 1564}
1565 1565
1566#ifdef CONFIG_TRACING
1567
1566#define TRACE_RECURSIVE_DEPTH 16 1568#define TRACE_RECURSIVE_DEPTH 16
1567 1569
1568static int trace_recursive_lock(void) 1570static int trace_recursive_lock(void)
@@ -1593,6 +1595,13 @@ static void trace_recursive_unlock(void)
1593 current->trace_recursion--; 1595 current->trace_recursion--;
1594} 1596}
1595 1597
1598#else
1599
1600#define trace_recursive_lock() (0)
1601#define trace_recursive_unlock() do { } while (0)
1602
1603#endif
1604
1596static DEFINE_PER_CPU(int, rb_need_resched); 1605static DEFINE_PER_CPU(int, rb_need_resched);
1597 1606
1598/** 1607/**
@@ -3104,6 +3113,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
3104} 3113}
3105EXPORT_SYMBOL_GPL(ring_buffer_read_page); 3114EXPORT_SYMBOL_GPL(ring_buffer_read_page);
3106 3115
3116#ifdef CONFIG_TRACING
3107static ssize_t 3117static ssize_t
3108rb_simple_read(struct file *filp, char __user *ubuf, 3118rb_simple_read(struct file *filp, char __user *ubuf,
3109 size_t cnt, loff_t *ppos) 3119 size_t cnt, loff_t *ppos)
@@ -3171,6 +3181,7 @@ static __init int rb_init_debugfs(void)
3171} 3181}
3172 3182
3173fs_initcall(rb_init_debugfs); 3183fs_initcall(rb_init_debugfs);
3184#endif
3174 3185
3175#ifdef CONFIG_HOTPLUG_CPU 3186#ifdef CONFIG_HOTPLUG_CPU
3176static int rb_cpu_notify(struct notifier_block *self, 3187static int rb_cpu_notify(struct notifier_block *self,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 076fa6f0ee4..3aa0a0dfdfa 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -284,13 +284,12 @@ void trace_wake_up(void)
284static int __init set_buf_size(char *str) 284static int __init set_buf_size(char *str)
285{ 285{
286 unsigned long buf_size; 286 unsigned long buf_size;
287 int ret;
288 287
289 if (!str) 288 if (!str)
290 return 0; 289 return 0;
291 ret = strict_strtoul(str, 0, &buf_size); 290 buf_size = memparse(str, &str);
292 /* nr_entries can not be zero */ 291 /* nr_entries can not be zero */
293 if (ret < 0 || buf_size == 0) 292 if (buf_size == 0)
294 return 0; 293 return 0;
295 trace_buf_size = buf_size; 294 trace_buf_size = buf_size;
296 return 1; 295 return 1;
@@ -2053,25 +2052,23 @@ static int tracing_open(struct inode *inode, struct file *file)
2053static void * 2052static void *
2054t_next(struct seq_file *m, void *v, loff_t *pos) 2053t_next(struct seq_file *m, void *v, loff_t *pos)
2055{ 2054{
2056 struct tracer *t = m->private; 2055 struct tracer *t = v;
2057 2056
2058 (*pos)++; 2057 (*pos)++;
2059 2058
2060 if (t) 2059 if (t)
2061 t = t->next; 2060 t = t->next;
2062 2061
2063 m->private = t;
2064
2065 return t; 2062 return t;
2066} 2063}
2067 2064
2068static void *t_start(struct seq_file *m, loff_t *pos) 2065static void *t_start(struct seq_file *m, loff_t *pos)
2069{ 2066{
2070 struct tracer *t = m->private; 2067 struct tracer *t;
2071 loff_t l = 0; 2068 loff_t l = 0;
2072 2069
2073 mutex_lock(&trace_types_lock); 2070 mutex_lock(&trace_types_lock);
2074 for (; t && l < *pos; t = t_next(m, t, &l)) 2071 for (t = trace_types; t && l < *pos; t = t_next(m, t, &l))
2075 ; 2072 ;
2076 2073
2077 return t; 2074 return t;
@@ -2107,18 +2104,10 @@ static struct seq_operations show_traces_seq_ops = {
2107 2104
2108static int show_traces_open(struct inode *inode, struct file *file) 2105static int show_traces_open(struct inode *inode, struct file *file)
2109{ 2106{
2110 int ret;
2111
2112 if (tracing_disabled) 2107 if (tracing_disabled)
2113 return -ENODEV; 2108 return -ENODEV;
2114 2109
2115 ret = seq_open(file, &show_traces_seq_ops); 2110 return seq_open(file, &show_traces_seq_ops);
2116 if (!ret) {
2117 struct seq_file *m = file->private_data;
2118 m->private = trace_types;
2119 }
2120
2121 return ret;
2122} 2111}
2123 2112
2124static ssize_t 2113static ssize_t
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6e735d4771f..3548ae5cc78 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -597,6 +597,7 @@ print_graph_function(struct trace_iterator *iter)
597 597
598extern struct pid *ftrace_pid_trace; 598extern struct pid *ftrace_pid_trace;
599 599
600#ifdef CONFIG_FUNCTION_TRACER
600static inline int ftrace_trace_task(struct task_struct *task) 601static inline int ftrace_trace_task(struct task_struct *task)
601{ 602{
602 if (!ftrace_pid_trace) 603 if (!ftrace_pid_trace)
@@ -604,6 +605,12 @@ static inline int ftrace_trace_task(struct task_struct *task)
604 605
605 return test_tsk_trace_trace(task); 606 return test_tsk_trace_trace(task);
606} 607}
608#else
609static inline int ftrace_trace_task(struct task_struct *task)
610{
611 return 1;
612}
613#endif
607 614
608/* 615/*
609 * trace_iterator_flags is an enumeration that defines bit 616 * trace_iterator_flags is an enumeration that defines bit
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index 5e32e375134..6db005e1248 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -26,6 +26,9 @@ TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
26 ftrace_graph_ret_entry, ignore, 26 ftrace_graph_ret_entry, ignore,
27 TRACE_STRUCT( 27 TRACE_STRUCT(
28 TRACE_FIELD(unsigned long, ret.func, func) 28 TRACE_FIELD(unsigned long, ret.func, func)
29 TRACE_FIELD(unsigned long long, ret.calltime, calltime)
30 TRACE_FIELD(unsigned long long, ret.rettime, rettime)
31 TRACE_FIELD(unsigned long, ret.overrun, overrun)
29 TRACE_FIELD(int, ret.depth, depth) 32 TRACE_FIELD(int, ret.depth, depth)
30 ), 33 ),
31 TP_RAW_FMT("<-- %lx (%d)") 34 TP_RAW_FMT("<-- %lx (%d)")
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index aa08be69a1b..53c8fd376a8 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -300,10 +300,18 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
300 300
301static void *t_start(struct seq_file *m, loff_t *pos) 301static void *t_start(struct seq_file *m, loff_t *pos)
302{ 302{
303 struct ftrace_event_call *call = NULL;
304 loff_t l;
305
303 mutex_lock(&event_mutex); 306 mutex_lock(&event_mutex);
304 if (*pos == 0) 307
305 m->private = ftrace_events.next; 308 m->private = ftrace_events.next;
306 return t_next(m, NULL, pos); 309 for (l = 0; l <= *pos; ) {
310 call = t_next(m, NULL, &l);
311 if (!call)
312 break;
313 }
314 return call;
307} 315}
308 316
309static void * 317static void *
@@ -332,10 +340,18 @@ s_next(struct seq_file *m, void *v, loff_t *pos)
332 340
333static void *s_start(struct seq_file *m, loff_t *pos) 341static void *s_start(struct seq_file *m, loff_t *pos)
334{ 342{
343 struct ftrace_event_call *call = NULL;
344 loff_t l;
345
335 mutex_lock(&event_mutex); 346 mutex_lock(&event_mutex);
336 if (*pos == 0) 347
337 m->private = ftrace_events.next; 348 m->private = ftrace_events.next;
338 return s_next(m, NULL, pos); 349 for (l = 0; l <= *pos; ) {
350 call = s_next(m, NULL, &l);
351 if (!call)
352 break;
353 }
354 return call;
339} 355}
340 356
341static int t_show(struct seq_file *m, void *v) 357static int t_show(struct seq_file *m, void *v)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 90f13476483..7402144bff2 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -302,8 +302,7 @@ ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
302 if (count == -1) 302 if (count == -1)
303 seq_printf(m, ":unlimited\n"); 303 seq_printf(m, ":unlimited\n");
304 else 304 else
305 seq_printf(m, ":count=%ld", count); 305 seq_printf(m, ":count=%ld\n", count);
306 seq_putc(m, '\n');
307 306
308 return 0; 307 return 0;
309} 308}
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 7938f3ae93e..e0c2545622e 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -27,8 +27,7 @@ void trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{ 27{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; 28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
29 29
30 s->buffer[len] = 0; 30 seq_write(m, s->buffer, len);
31 seq_puts(m, s->buffer);
32 31
33 trace_seq_init(s); 32 trace_seq_init(s);
34} 33}
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 9bece9687b6..7b627811082 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -155,25 +155,19 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
155EXPORT_SYMBOL_GPL(__ftrace_vprintk); 155EXPORT_SYMBOL_GPL(__ftrace_vprintk);
156 156
157static void * 157static void *
158t_next(struct seq_file *m, void *v, loff_t *pos) 158t_start(struct seq_file *m, loff_t *pos)
159{ 159{
160 const char **fmt = m->private; 160 const char **fmt = __start___trace_bprintk_fmt + *pos;
161 const char **next = fmt;
162
163 (*pos)++;
164 161
165 if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt) 162 if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt)
166 return NULL; 163 return NULL;
167
168 next = fmt;
169 m->private = ++next;
170
171 return fmt; 164 return fmt;
172} 165}
173 166
174static void *t_start(struct seq_file *m, loff_t *pos) 167static void *t_next(struct seq_file *m, void * v, loff_t *pos)
175{ 168{
176 return t_next(m, NULL, pos); 169 (*pos)++;
170 return t_start(m, pos);
177} 171}
178 172
179static int t_show(struct seq_file *m, void *v) 173static int t_show(struct seq_file *m, void *v)
@@ -224,15 +218,7 @@ static const struct seq_operations show_format_seq_ops = {
224static int 218static int
225ftrace_formats_open(struct inode *inode, struct file *file) 219ftrace_formats_open(struct inode *inode, struct file *file)
226{ 220{
227 int ret; 221 return seq_open(file, &show_format_seq_ops);
228
229 ret = seq_open(file, &show_format_seq_ops);
230 if (!ret) {
231 struct seq_file *m = file->private_data;
232
233 m->private = __start___trace_bprintk_fmt;
234 }
235 return ret;
236} 222}
237 223
238static const struct file_operations ftrace_formats_fops = { 224static const struct file_operations ftrace_formats_fops = {
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 2d7aebd71db..e644af91012 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -326,10 +326,10 @@ stack_trace_sysctl(struct ctl_table *table, int write,
326 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 326 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
327 327
328 if (ret || !write || 328 if (ret || !write ||
329 (last_stack_tracer_enabled == stack_tracer_enabled)) 329 (last_stack_tracer_enabled == !!stack_tracer_enabled))
330 goto out; 330 goto out;
331 331
332 last_stack_tracer_enabled = stack_tracer_enabled; 332 last_stack_tracer_enabled = !!stack_tracer_enabled;
333 333
334 if (stack_tracer_enabled) 334 if (stack_tracer_enabled)
335 register_ftrace_function(&trace_ops); 335 register_ftrace_function(&trace_ops);
diff --git a/kernel/trace/trace_stat.c b/kernel/trace/trace_stat.c
index c00643733f4..e66f5e49334 100644
--- a/kernel/trace/trace_stat.c
+++ b/kernel/trace/trace_stat.c
@@ -199,17 +199,13 @@ static void *stat_seq_start(struct seq_file *s, loff_t *pos)
199 mutex_lock(&session->stat_mutex); 199 mutex_lock(&session->stat_mutex);
200 200
201 /* If we are in the beginning of the file, print the headers */ 201 /* If we are in the beginning of the file, print the headers */
202 if (!*pos && session->ts->stat_headers) { 202 if (!*pos && session->ts->stat_headers)
203 (*pos)++;
204 return SEQ_START_TOKEN; 203 return SEQ_START_TOKEN;
205 }
206 204
207 node = rb_first(&session->stat_root); 205 node = rb_first(&session->stat_root);
208 for (i = 0; node && i < *pos; i++) 206 for (i = 0; node && i < *pos; i++)
209 node = rb_next(node); 207 node = rb_next(node);
210 208
211 (*pos)++;
212
213 return node; 209 return node;
214} 210}
215 211