aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJames Morris <jmorris@namei.org>2009-07-13 10:30:40 -0400
committerJames Morris <jmorris@namei.org>2009-07-13 10:30:40 -0400
commit7d45ecafb6792ca68da9517969d37d910601845f (patch)
treea98b1074e5577e66a97963745f975404d0aac266 /kernel
parentbe940d6279c30a2d7c4e8d1d5435f957f594d66d (diff)
parent7638d5322bd89d49e013a03fe2afaeb6d214fabd (diff)
Merge branch 'master' into next
Conflicts: include/linux/personality.h Use Linus' version. Signed-off-by: James Morris <jmorris@namei.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c6
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/kmod.c1
-rw-r--r--kernel/kprobes.c6
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/perf_counter.c322
-rw-r--r--kernel/power/user.c1
-rw-r--r--kernel/ptrace.c4
-rw-r--r--kernel/rcutree.c3
-rw-r--r--kernel/resource.c2
-rw-r--r--kernel/sched.c14
-rw-r--r--kernel/trace/Kconfig6
-rw-r--r--kernel/trace/blktrace.c1
-rw-r--r--kernel/trace/ftrace.c4
-rw-r--r--kernel/trace/trace.c1
-rw-r--r--kernel/trace/trace_event_types.h3
-rw-r--r--kernel/trace/trace_output.c3
-rw-r--r--kernel/trace/trace_stack.c4
19 files changed, 306 insertions, 83 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 7afa31564162..9f3391090b3e 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -215,6 +215,7 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
215static int acct_on(char *name) 215static int acct_on(char *name)
216{ 216{
217 struct file *file; 217 struct file *file;
218 struct vfsmount *mnt;
218 int error; 219 int error;
219 struct pid_namespace *ns; 220 struct pid_namespace *ns;
220 struct bsd_acct_struct *acct = NULL; 221 struct bsd_acct_struct *acct = NULL;
@@ -256,11 +257,12 @@ static int acct_on(char *name)
256 acct = NULL; 257 acct = NULL;
257 } 258 }
258 259
259 mnt_pin(file->f_path.mnt); 260 mnt = file->f_path.mnt;
261 mnt_pin(mnt);
260 acct_file_reopen(ns->bacct, file, ns); 262 acct_file_reopen(ns->bacct, file, ns);
261 spin_unlock(&acct_lock); 263 spin_unlock(&acct_lock);
262 264
263 mntput(file->f_path.mnt); /* it's pinned, now give up active reference */ 265 mntput(mnt); /* it's pinned, now give up active reference */
264 kfree(acct); 266 kfree(acct);
265 267
266 return 0; 268 return 0;
diff --git a/kernel/exit.c b/kernel/exit.c
index 628d41f0dd54..869dc221733e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -12,7 +12,6 @@
12#include <linux/completion.h> 12#include <linux/completion.h>
13#include <linux/personality.h> 13#include <linux/personality.h>
14#include <linux/tty.h> 14#include <linux/tty.h>
15#include <linux/mnt_namespace.h>
16#include <linux/iocontext.h> 15#include <linux/iocontext.h>
17#include <linux/key.h> 16#include <linux/key.h>
18#include <linux/security.h> 17#include <linux/security.h>
diff --git a/kernel/fork.c b/kernel/fork.c
index 467746b3f0aa..bd2959228871 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -17,7 +17,6 @@
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/vmalloc.h> 18#include <linux/vmalloc.h>
19#include <linux/completion.h> 19#include <linux/completion.h>
20#include <linux/mnt_namespace.h>
21#include <linux/personality.h> 20#include <linux/personality.h>
22#include <linux/mempolicy.h> 21#include <linux/mempolicy.h>
23#include <linux/sem.h> 22#include <linux/sem.h>
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 7e95bedb2bfc..385c31a1bdbf 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -24,7 +24,6 @@
24#include <linux/unistd.h> 24#include <linux/unistd.h>
25#include <linux/kmod.h> 25#include <linux/kmod.h>
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/mnt_namespace.h>
28#include <linux/completion.h> 27#include <linux/completion.h>
29#include <linux/file.h> 28#include <linux/file.h>
30#include <linux/fdtable.h> 29#include <linux/fdtable.h>
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index c0fa54b276d9..16b5739c516a 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -237,13 +237,9 @@ static int __kprobes collect_garbage_slots(void)
237{ 237{
238 struct kprobe_insn_page *kip; 238 struct kprobe_insn_page *kip;
239 struct hlist_node *pos, *next; 239 struct hlist_node *pos, *next;
240 int safety;
241 240
242 /* Ensure no-one is preepmted on the garbages */ 241 /* Ensure no-one is preepmted on the garbages */
243 mutex_unlock(&kprobe_insn_mutex); 242 if (check_safety())
244 safety = check_safety();
245 mutex_lock(&kprobe_insn_mutex);
246 if (safety != 0)
247 return -EAGAIN; 243 return -EAGAIN;
248 244
249 hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) { 245 hlist_for_each_entry_safe(kip, pos, next, &kprobe_insn_pages, hlist) {
diff --git a/kernel/module.c b/kernel/module.c
index 38928fcaff2b..0a049837008e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2451,9 +2451,9 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2451 return ret; 2451 return ret;
2452 } 2452 }
2453 if (ret > 0) { 2453 if (ret > 0) {
2454 printk(KERN_WARNING "%s: '%s'->init suspiciously returned %d, " 2454 printk(KERN_WARNING
2455 "it should follow 0/-E convention\n" 2455"%s: '%s'->init suspiciously returned %d, it should follow 0/-E convention\n"
2456 KERN_WARNING "%s: loading module anyway...\n", 2456"%s: loading module anyway...\n",
2457 __func__, mod->name, ret, 2457 __func__, mod->name, ret,
2458 __func__); 2458 __func__);
2459 dump_stack(); 2459 dump_stack();
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 1a933a221ea4..a641eb753b8c 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -236,6 +236,8 @@ list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
236 236
237 list_add_rcu(&counter->event_entry, &ctx->event_list); 237 list_add_rcu(&counter->event_entry, &ctx->event_list);
238 ctx->nr_counters++; 238 ctx->nr_counters++;
239 if (counter->attr.inherit_stat)
240 ctx->nr_stat++;
239} 241}
240 242
241/* 243/*
@@ -250,6 +252,8 @@ list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
250 if (list_empty(&counter->list_entry)) 252 if (list_empty(&counter->list_entry))
251 return; 253 return;
252 ctx->nr_counters--; 254 ctx->nr_counters--;
255 if (counter->attr.inherit_stat)
256 ctx->nr_stat--;
253 257
254 list_del_init(&counter->list_entry); 258 list_del_init(&counter->list_entry);
255 list_del_rcu(&counter->event_entry); 259 list_del_rcu(&counter->event_entry);
@@ -1006,6 +1010,81 @@ static int context_equiv(struct perf_counter_context *ctx1,
1006 && !ctx1->pin_count && !ctx2->pin_count; 1010 && !ctx1->pin_count && !ctx2->pin_count;
1007} 1011}
1008 1012
1013static void __perf_counter_read(void *counter);
1014
1015static void __perf_counter_sync_stat(struct perf_counter *counter,
1016 struct perf_counter *next_counter)
1017{
1018 u64 value;
1019
1020 if (!counter->attr.inherit_stat)
1021 return;
1022
1023 /*
1024 * Update the counter value, we cannot use perf_counter_read()
1025 * because we're in the middle of a context switch and have IRQs
1026 * disabled, which upsets smp_call_function_single(), however
1027 * we know the counter must be on the current CPU, therefore we
1028 * don't need to use it.
1029 */
1030 switch (counter->state) {
1031 case PERF_COUNTER_STATE_ACTIVE:
1032 __perf_counter_read(counter);
1033 break;
1034
1035 case PERF_COUNTER_STATE_INACTIVE:
1036 update_counter_times(counter);
1037 break;
1038
1039 default:
1040 break;
1041 }
1042
1043 /*
1044 * In order to keep per-task stats reliable we need to flip the counter
1045 * values when we flip the contexts.
1046 */
1047 value = atomic64_read(&next_counter->count);
1048 value = atomic64_xchg(&counter->count, value);
1049 atomic64_set(&next_counter->count, value);
1050
1051 swap(counter->total_time_enabled, next_counter->total_time_enabled);
1052 swap(counter->total_time_running, next_counter->total_time_running);
1053
1054 /*
1055 * Since we swizzled the values, update the user visible data too.
1056 */
1057 perf_counter_update_userpage(counter);
1058 perf_counter_update_userpage(next_counter);
1059}
1060
1061#define list_next_entry(pos, member) \
1062 list_entry(pos->member.next, typeof(*pos), member)
1063
1064static void perf_counter_sync_stat(struct perf_counter_context *ctx,
1065 struct perf_counter_context *next_ctx)
1066{
1067 struct perf_counter *counter, *next_counter;
1068
1069 if (!ctx->nr_stat)
1070 return;
1071
1072 counter = list_first_entry(&ctx->event_list,
1073 struct perf_counter, event_entry);
1074
1075 next_counter = list_first_entry(&next_ctx->event_list,
1076 struct perf_counter, event_entry);
1077
1078 while (&counter->event_entry != &ctx->event_list &&
1079 &next_counter->event_entry != &next_ctx->event_list) {
1080
1081 __perf_counter_sync_stat(counter, next_counter);
1082
1083 counter = list_next_entry(counter, event_entry);
1084 next_counter = list_next_entry(counter, event_entry);
1085 }
1086}
1087
1009/* 1088/*
1010 * Called from scheduler to remove the counters of the current task, 1089 * Called from scheduler to remove the counters of the current task,
1011 * with interrupts disabled. 1090 * with interrupts disabled.
@@ -1061,6 +1140,8 @@ void perf_counter_task_sched_out(struct task_struct *task,
1061 ctx->task = next; 1140 ctx->task = next;
1062 next_ctx->task = task; 1141 next_ctx->task = task;
1063 do_switch = 0; 1142 do_switch = 0;
1143
1144 perf_counter_sync_stat(ctx, next_ctx);
1064 } 1145 }
1065 spin_unlock(&next_ctx->lock); 1146 spin_unlock(&next_ctx->lock);
1066 spin_unlock(&ctx->lock); 1147 spin_unlock(&ctx->lock);
@@ -1348,9 +1429,56 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
1348} 1429}
1349 1430
1350/* 1431/*
1432 * Enable all of a task's counters that have been marked enable-on-exec.
1433 * This expects task == current.
1434 */
1435static void perf_counter_enable_on_exec(struct task_struct *task)
1436{
1437 struct perf_counter_context *ctx;
1438 struct perf_counter *counter;
1439 unsigned long flags;
1440 int enabled = 0;
1441
1442 local_irq_save(flags);
1443 ctx = task->perf_counter_ctxp;
1444 if (!ctx || !ctx->nr_counters)
1445 goto out;
1446
1447 __perf_counter_task_sched_out(ctx);
1448
1449 spin_lock(&ctx->lock);
1450
1451 list_for_each_entry(counter, &ctx->counter_list, list_entry) {
1452 if (!counter->attr.enable_on_exec)
1453 continue;
1454 counter->attr.enable_on_exec = 0;
1455 if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
1456 continue;
1457 counter->state = PERF_COUNTER_STATE_INACTIVE;
1458 counter->tstamp_enabled =
1459 ctx->time - counter->total_time_enabled;
1460 enabled = 1;
1461 }
1462
1463 /*
1464 * Unclone this context if we enabled any counter.
1465 */
1466 if (enabled && ctx->parent_ctx) {
1467 put_ctx(ctx->parent_ctx);
1468 ctx->parent_ctx = NULL;
1469 }
1470
1471 spin_unlock(&ctx->lock);
1472
1473 perf_counter_task_sched_in(task, smp_processor_id());
1474 out:
1475 local_irq_restore(flags);
1476}
1477
1478/*
1351 * Cross CPU call to read the hardware counter 1479 * Cross CPU call to read the hardware counter
1352 */ 1480 */
1353static void __read(void *info) 1481static void __perf_counter_read(void *info)
1354{ 1482{
1355 struct perf_counter *counter = info; 1483 struct perf_counter *counter = info;
1356 struct perf_counter_context *ctx = counter->ctx; 1484 struct perf_counter_context *ctx = counter->ctx;
@@ -1372,7 +1500,7 @@ static u64 perf_counter_read(struct perf_counter *counter)
1372 */ 1500 */
1373 if (counter->state == PERF_COUNTER_STATE_ACTIVE) { 1501 if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
1374 smp_call_function_single(counter->oncpu, 1502 smp_call_function_single(counter->oncpu,
1375 __read, counter, 1); 1503 __perf_counter_read, counter, 1);
1376 } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { 1504 } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
1377 update_counter_times(counter); 1505 update_counter_times(counter);
1378 } 1506 }
@@ -1508,11 +1636,13 @@ static void free_counter(struct perf_counter *counter)
1508{ 1636{
1509 perf_pending_sync(counter); 1637 perf_pending_sync(counter);
1510 1638
1511 atomic_dec(&nr_counters); 1639 if (!counter->parent) {
1512 if (counter->attr.mmap) 1640 atomic_dec(&nr_counters);
1513 atomic_dec(&nr_mmap_counters); 1641 if (counter->attr.mmap)
1514 if (counter->attr.comm) 1642 atomic_dec(&nr_mmap_counters);
1515 atomic_dec(&nr_comm_counters); 1643 if (counter->attr.comm)
1644 atomic_dec(&nr_comm_counters);
1645 }
1516 1646
1517 if (counter->destroy) 1647 if (counter->destroy)
1518 counter->destroy(counter); 1648 counter->destroy(counter);
@@ -1751,6 +1881,14 @@ int perf_counter_task_disable(void)
1751 return 0; 1881 return 0;
1752} 1882}
1753 1883
1884static int perf_counter_index(struct perf_counter *counter)
1885{
1886 if (counter->state != PERF_COUNTER_STATE_ACTIVE)
1887 return 0;
1888
1889 return counter->hw.idx + 1 - PERF_COUNTER_INDEX_OFFSET;
1890}
1891
1754/* 1892/*
1755 * Callers need to ensure there can be no nesting of this function, otherwise 1893 * Callers need to ensure there can be no nesting of this function, otherwise
1756 * the seqlock logic goes bad. We can not serialize this because the arch 1894 * the seqlock logic goes bad. We can not serialize this because the arch
@@ -1775,11 +1913,17 @@ void perf_counter_update_userpage(struct perf_counter *counter)
1775 preempt_disable(); 1913 preempt_disable();
1776 ++userpg->lock; 1914 ++userpg->lock;
1777 barrier(); 1915 barrier();
1778 userpg->index = counter->hw.idx; 1916 userpg->index = perf_counter_index(counter);
1779 userpg->offset = atomic64_read(&counter->count); 1917 userpg->offset = atomic64_read(&counter->count);
1780 if (counter->state == PERF_COUNTER_STATE_ACTIVE) 1918 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
1781 userpg->offset -= atomic64_read(&counter->hw.prev_count); 1919 userpg->offset -= atomic64_read(&counter->hw.prev_count);
1782 1920
1921 userpg->time_enabled = counter->total_time_enabled +
1922 atomic64_read(&counter->child_total_time_enabled);
1923
1924 userpg->time_running = counter->total_time_running +
1925 atomic64_read(&counter->child_total_time_running);
1926
1783 barrier(); 1927 barrier();
1784 ++userpg->lock; 1928 ++userpg->lock;
1785 preempt_enable(); 1929 preempt_enable();
@@ -1876,7 +2020,7 @@ fail:
1876 2020
1877static void perf_mmap_free_page(unsigned long addr) 2021static void perf_mmap_free_page(unsigned long addr)
1878{ 2022{
1879 struct page *page = virt_to_page(addr); 2023 struct page *page = virt_to_page((void *)addr);
1880 2024
1881 page->mapping = NULL; 2025 page->mapping = NULL;
1882 __free_page(page); 2026 __free_page(page);
@@ -2483,15 +2627,14 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2483 u32 cpu, reserved; 2627 u32 cpu, reserved;
2484 } cpu_entry; 2628 } cpu_entry;
2485 2629
2486 header.type = 0; 2630 header.type = PERF_EVENT_SAMPLE;
2487 header.size = sizeof(header); 2631 header.size = sizeof(header);
2488 2632
2489 header.misc = PERF_EVENT_MISC_OVERFLOW; 2633 header.misc = 0;
2490 header.misc |= perf_misc_flags(data->regs); 2634 header.misc |= perf_misc_flags(data->regs);
2491 2635
2492 if (sample_type & PERF_SAMPLE_IP) { 2636 if (sample_type & PERF_SAMPLE_IP) {
2493 ip = perf_instruction_pointer(data->regs); 2637 ip = perf_instruction_pointer(data->regs);
2494 header.type |= PERF_SAMPLE_IP;
2495 header.size += sizeof(ip); 2638 header.size += sizeof(ip);
2496 } 2639 }
2497 2640
@@ -2500,7 +2643,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2500 tid_entry.pid = perf_counter_pid(counter, current); 2643 tid_entry.pid = perf_counter_pid(counter, current);
2501 tid_entry.tid = perf_counter_tid(counter, current); 2644 tid_entry.tid = perf_counter_tid(counter, current);
2502 2645
2503 header.type |= PERF_SAMPLE_TID;
2504 header.size += sizeof(tid_entry); 2646 header.size += sizeof(tid_entry);
2505 } 2647 }
2506 2648
@@ -2510,34 +2652,25 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2510 */ 2652 */
2511 time = sched_clock(); 2653 time = sched_clock();
2512 2654
2513 header.type |= PERF_SAMPLE_TIME;
2514 header.size += sizeof(u64); 2655 header.size += sizeof(u64);
2515 } 2656 }
2516 2657
2517 if (sample_type & PERF_SAMPLE_ADDR) { 2658 if (sample_type & PERF_SAMPLE_ADDR)
2518 header.type |= PERF_SAMPLE_ADDR;
2519 header.size += sizeof(u64); 2659 header.size += sizeof(u64);
2520 }
2521 2660
2522 if (sample_type & PERF_SAMPLE_ID) { 2661 if (sample_type & PERF_SAMPLE_ID)
2523 header.type |= PERF_SAMPLE_ID;
2524 header.size += sizeof(u64); 2662 header.size += sizeof(u64);
2525 }
2526 2663
2527 if (sample_type & PERF_SAMPLE_CPU) { 2664 if (sample_type & PERF_SAMPLE_CPU) {
2528 header.type |= PERF_SAMPLE_CPU;
2529 header.size += sizeof(cpu_entry); 2665 header.size += sizeof(cpu_entry);
2530 2666
2531 cpu_entry.cpu = raw_smp_processor_id(); 2667 cpu_entry.cpu = raw_smp_processor_id();
2532 } 2668 }
2533 2669
2534 if (sample_type & PERF_SAMPLE_PERIOD) { 2670 if (sample_type & PERF_SAMPLE_PERIOD)
2535 header.type |= PERF_SAMPLE_PERIOD;
2536 header.size += sizeof(u64); 2671 header.size += sizeof(u64);
2537 }
2538 2672
2539 if (sample_type & PERF_SAMPLE_GROUP) { 2673 if (sample_type & PERF_SAMPLE_GROUP) {
2540 header.type |= PERF_SAMPLE_GROUP;
2541 header.size += sizeof(u64) + 2674 header.size += sizeof(u64) +
2542 counter->nr_siblings * sizeof(group_entry); 2675 counter->nr_siblings * sizeof(group_entry);
2543 } 2676 }
@@ -2547,10 +2680,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2547 2680
2548 if (callchain) { 2681 if (callchain) {
2549 callchain_size = (1 + callchain->nr) * sizeof(u64); 2682 callchain_size = (1 + callchain->nr) * sizeof(u64);
2550
2551 header.type |= PERF_SAMPLE_CALLCHAIN;
2552 header.size += callchain_size; 2683 header.size += callchain_size;
2553 } 2684 } else
2685 header.size += sizeof(u64);
2554 } 2686 }
2555 2687
2556 ret = perf_output_begin(&handle, counter, header.size, nmi, 1); 2688 ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
@@ -2601,13 +2733,79 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2601 } 2733 }
2602 } 2734 }
2603 2735
2604 if (callchain) 2736 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2605 perf_output_copy(&handle, callchain, callchain_size); 2737 if (callchain)
2738 perf_output_copy(&handle, callchain, callchain_size);
2739 else {
2740 u64 nr = 0;
2741 perf_output_put(&handle, nr);
2742 }
2743 }
2606 2744
2607 perf_output_end(&handle); 2745 perf_output_end(&handle);
2608} 2746}
2609 2747
2610/* 2748/*
2749 * read event
2750 */
2751
2752struct perf_read_event {
2753 struct perf_event_header header;
2754
2755 u32 pid;
2756 u32 tid;
2757 u64 value;
2758 u64 format[3];
2759};
2760
2761static void
2762perf_counter_read_event(struct perf_counter *counter,
2763 struct task_struct *task)
2764{
2765 struct perf_output_handle handle;
2766 struct perf_read_event event = {
2767 .header = {
2768 .type = PERF_EVENT_READ,
2769 .misc = 0,
2770 .size = sizeof(event) - sizeof(event.format),
2771 },
2772 .pid = perf_counter_pid(counter, task),
2773 .tid = perf_counter_tid(counter, task),
2774 .value = atomic64_read(&counter->count),
2775 };
2776 int ret, i = 0;
2777
2778 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2779 event.header.size += sizeof(u64);
2780 event.format[i++] = counter->total_time_enabled;
2781 }
2782
2783 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2784 event.header.size += sizeof(u64);
2785 event.format[i++] = counter->total_time_running;
2786 }
2787
2788 if (counter->attr.read_format & PERF_FORMAT_ID) {
2789 u64 id;
2790
2791 event.header.size += sizeof(u64);
2792 if (counter->parent)
2793 id = counter->parent->id;
2794 else
2795 id = counter->id;
2796
2797 event.format[i++] = id;
2798 }
2799
2800 ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
2801 if (ret)
2802 return;
2803
2804 perf_output_copy(&handle, &event, event.header.size);
2805 perf_output_end(&handle);
2806}
2807
2808/*
2611 * fork tracking 2809 * fork tracking
2612 */ 2810 */
2613 2811
@@ -2798,6 +2996,9 @@ void perf_counter_comm(struct task_struct *task)
2798{ 2996{
2799 struct perf_comm_event comm_event; 2997 struct perf_comm_event comm_event;
2800 2998
2999 if (task->perf_counter_ctxp)
3000 perf_counter_enable_on_exec(task);
3001
2801 if (!atomic_read(&nr_comm_counters)) 3002 if (!atomic_read(&nr_comm_counters))
2802 return; 3003 return;
2803 3004
@@ -3317,8 +3518,8 @@ out:
3317 put_cpu_var(perf_cpu_context); 3518 put_cpu_var(perf_cpu_context);
3318} 3519}
3319 3520
3320void 3521void __perf_swcounter_event(u32 event, u64 nr, int nmi,
3321perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) 3522 struct pt_regs *regs, u64 addr)
3322{ 3523{
3323 struct perf_sample_data data = { 3524 struct perf_sample_data data = {
3324 .regs = regs, 3525 .regs = regs,
@@ -3509,9 +3710,21 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
3509} 3710}
3510#endif 3711#endif
3511 3712
3713atomic_t perf_swcounter_enabled[PERF_COUNT_SW_MAX];
3714
3715static void sw_perf_counter_destroy(struct perf_counter *counter)
3716{
3717 u64 event = counter->attr.config;
3718
3719 WARN_ON(counter->parent);
3720
3721 atomic_dec(&perf_swcounter_enabled[event]);
3722}
3723
3512static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) 3724static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
3513{ 3725{
3514 const struct pmu *pmu = NULL; 3726 const struct pmu *pmu = NULL;
3727 u64 event = counter->attr.config;
3515 3728
3516 /* 3729 /*
3517 * Software counters (currently) can't in general distinguish 3730 * Software counters (currently) can't in general distinguish
@@ -3520,7 +3733,7 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
3520 * to be kernel events, and page faults are never hypervisor 3733 * to be kernel events, and page faults are never hypervisor
3521 * events. 3734 * events.
3522 */ 3735 */
3523 switch (counter->attr.config) { 3736 switch (event) {
3524 case PERF_COUNT_SW_CPU_CLOCK: 3737 case PERF_COUNT_SW_CPU_CLOCK:
3525 pmu = &perf_ops_cpu_clock; 3738 pmu = &perf_ops_cpu_clock;
3526 3739
@@ -3541,6 +3754,10 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
3541 case PERF_COUNT_SW_PAGE_FAULTS_MAJ: 3754 case PERF_COUNT_SW_PAGE_FAULTS_MAJ:
3542 case PERF_COUNT_SW_CONTEXT_SWITCHES: 3755 case PERF_COUNT_SW_CONTEXT_SWITCHES:
3543 case PERF_COUNT_SW_CPU_MIGRATIONS: 3756 case PERF_COUNT_SW_CPU_MIGRATIONS:
3757 if (!counter->parent) {
3758 atomic_inc(&perf_swcounter_enabled[event]);
3759 counter->destroy = sw_perf_counter_destroy;
3760 }
3544 pmu = &perf_ops_generic; 3761 pmu = &perf_ops_generic;
3545 break; 3762 break;
3546 } 3763 }
@@ -3556,6 +3773,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
3556 int cpu, 3773 int cpu,
3557 struct perf_counter_context *ctx, 3774 struct perf_counter_context *ctx,
3558 struct perf_counter *group_leader, 3775 struct perf_counter *group_leader,
3776 struct perf_counter *parent_counter,
3559 gfp_t gfpflags) 3777 gfp_t gfpflags)
3560{ 3778{
3561 const struct pmu *pmu; 3779 const struct pmu *pmu;
@@ -3591,6 +3809,8 @@ perf_counter_alloc(struct perf_counter_attr *attr,
3591 counter->ctx = ctx; 3809 counter->ctx = ctx;
3592 counter->oncpu = -1; 3810 counter->oncpu = -1;
3593 3811
3812 counter->parent = parent_counter;
3813
3594 counter->ns = get_pid_ns(current->nsproxy->pid_ns); 3814 counter->ns = get_pid_ns(current->nsproxy->pid_ns);
3595 counter->id = atomic64_inc_return(&perf_counter_id); 3815 counter->id = atomic64_inc_return(&perf_counter_id);
3596 3816
@@ -3648,11 +3868,13 @@ done:
3648 3868
3649 counter->pmu = pmu; 3869 counter->pmu = pmu;
3650 3870
3651 atomic_inc(&nr_counters); 3871 if (!counter->parent) {
3652 if (counter->attr.mmap) 3872 atomic_inc(&nr_counters);
3653 atomic_inc(&nr_mmap_counters); 3873 if (counter->attr.mmap)
3654 if (counter->attr.comm) 3874 atomic_inc(&nr_mmap_counters);
3655 atomic_inc(&nr_comm_counters); 3875 if (counter->attr.comm)
3876 atomic_inc(&nr_comm_counters);
3877 }
3656 3878
3657 return counter; 3879 return counter;
3658} 3880}
@@ -3815,7 +4037,7 @@ SYSCALL_DEFINE5(perf_counter_open,
3815 } 4037 }
3816 4038
3817 counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, 4039 counter = perf_counter_alloc(&attr, cpu, ctx, group_leader,
3818 GFP_KERNEL); 4040 NULL, GFP_KERNEL);
3819 ret = PTR_ERR(counter); 4041 ret = PTR_ERR(counter);
3820 if (IS_ERR(counter)) 4042 if (IS_ERR(counter))
3821 goto err_put_context; 4043 goto err_put_context;
@@ -3881,7 +4103,8 @@ inherit_counter(struct perf_counter *parent_counter,
3881 4103
3882 child_counter = perf_counter_alloc(&parent_counter->attr, 4104 child_counter = perf_counter_alloc(&parent_counter->attr,
3883 parent_counter->cpu, child_ctx, 4105 parent_counter->cpu, child_ctx,
3884 group_leader, GFP_KERNEL); 4106 group_leader, parent_counter,
4107 GFP_KERNEL);
3885 if (IS_ERR(child_counter)) 4108 if (IS_ERR(child_counter))
3886 return child_counter; 4109 return child_counter;
3887 get_ctx(child_ctx); 4110 get_ctx(child_ctx);
@@ -3904,12 +4127,6 @@ inherit_counter(struct perf_counter *parent_counter,
3904 */ 4127 */
3905 add_counter_to_ctx(child_counter, child_ctx); 4128 add_counter_to_ctx(child_counter, child_ctx);
3906 4129
3907 child_counter->parent = parent_counter;
3908 /*
3909 * inherit into child's child as well:
3910 */
3911 child_counter->attr.inherit = 1;
3912
3913 /* 4130 /*
3914 * Get a reference to the parent filp - we will fput it 4131 * Get a reference to the parent filp - we will fput it
3915 * when the child counter exits. This is safe to do because 4132 * when the child counter exits. This is safe to do because
@@ -3953,10 +4170,14 @@ static int inherit_group(struct perf_counter *parent_counter,
3953} 4170}
3954 4171
3955static void sync_child_counter(struct perf_counter *child_counter, 4172static void sync_child_counter(struct perf_counter *child_counter,
3956 struct perf_counter *parent_counter) 4173 struct task_struct *child)
3957{ 4174{
4175 struct perf_counter *parent_counter = child_counter->parent;
3958 u64 child_val; 4176 u64 child_val;
3959 4177
4178 if (child_counter->attr.inherit_stat)
4179 perf_counter_read_event(child_counter, child);
4180
3960 child_val = atomic64_read(&child_counter->count); 4181 child_val = atomic64_read(&child_counter->count);
3961 4182
3962 /* 4183 /*
@@ -3985,7 +4206,8 @@ static void sync_child_counter(struct perf_counter *child_counter,
3985 4206
3986static void 4207static void
3987__perf_counter_exit_task(struct perf_counter *child_counter, 4208__perf_counter_exit_task(struct perf_counter *child_counter,
3988 struct perf_counter_context *child_ctx) 4209 struct perf_counter_context *child_ctx,
4210 struct task_struct *child)
3989{ 4211{
3990 struct perf_counter *parent_counter; 4212 struct perf_counter *parent_counter;
3991 4213
@@ -3999,7 +4221,7 @@ __perf_counter_exit_task(struct perf_counter *child_counter,
3999 * counters need to be zapped - but otherwise linger. 4221 * counters need to be zapped - but otherwise linger.
4000 */ 4222 */
4001 if (parent_counter) { 4223 if (parent_counter) {
4002 sync_child_counter(child_counter, parent_counter); 4224 sync_child_counter(child_counter, child);
4003 free_counter(child_counter); 4225 free_counter(child_counter);
4004 } 4226 }
4005} 4227}
@@ -4061,7 +4283,7 @@ void perf_counter_exit_task(struct task_struct *child)
4061again: 4283again:
4062 list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, 4284 list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
4063 list_entry) 4285 list_entry)
4064 __perf_counter_exit_task(child_counter, child_ctx); 4286 __perf_counter_exit_task(child_counter, child_ctx, child);
4065 4287
4066 /* 4288 /*
4067 * If the last counter was a group counter, it will have appended all 4289 * If the last counter was a group counter, it will have appended all
diff --git a/kernel/power/user.c b/kernel/power/user.c
index ed97375daae9..bf0014d6a5f0 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -23,7 +23,6 @@
23#include <linux/console.h> 23#include <linux/console.h>
24#include <linux/cpu.h> 24#include <linux/cpu.h>
25#include <linux/freezer.h> 25#include <linux/freezer.h>
26#include <linux/smp_lock.h>
27#include <scsi/scsi_scan.h> 26#include <scsi/scsi_scan.h>
28 27
29#include <asm/uaccess.h> 28#include <asm/uaccess.h>
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 9a4184e04f29..307c285af59e 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -181,8 +181,8 @@ int ptrace_attach(struct task_struct *task)
181 * interference; SUID, SGID and LSM creds get determined differently 181 * interference; SUID, SGID and LSM creds get determined differently
182 * under ptrace. 182 * under ptrace.
183 */ 183 */
184 retval = mutex_lock_interruptible(&task->cred_guard_mutex); 184 retval = -ERESTARTNOINTR;
185 if (retval < 0) 185 if (mutex_lock_interruptible(&task->cred_guard_mutex))
186 goto out; 186 goto out;
187 187
188 task_lock(task); 188 task_lock(task);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 0dccfbba6d26..7717b95c2027 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1533,7 +1533,7 @@ void __init __rcu_init(void)
1533 int j; 1533 int j;
1534 struct rcu_node *rnp; 1534 struct rcu_node *rnp;
1535 1535
1536 printk(KERN_WARNING "Experimental hierarchical RCU implementation.\n"); 1536 printk(KERN_INFO "Hierarchical RCU implementation.\n");
1537#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 1537#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
1538 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); 1538 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
1539#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 1539#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
@@ -1546,7 +1546,6 @@ void __init __rcu_init(void)
1546 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i); 1546 rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long)i);
1547 /* Register notifier for non-boot CPUs */ 1547 /* Register notifier for non-boot CPUs */
1548 register_cpu_notifier(&rcu_nb); 1548 register_cpu_notifier(&rcu_nb);
1549 printk(KERN_WARNING "Experimental hierarchical RCU init done.\n");
1550} 1549}
1551 1550
1552module_param(blimit, int, 0); 1551module_param(blimit, int, 0);
diff --git a/kernel/resource.c b/kernel/resource.c
index ac5f3a36923f..78b087221c15 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -787,7 +787,7 @@ static int __init reserve_setup(char *str)
787 static struct resource reserve[MAXRESERVE]; 787 static struct resource reserve[MAXRESERVE];
788 788
789 for (;;) { 789 for (;;) {
790 int io_start, io_num; 790 unsigned int io_start, io_num;
791 int x = reserved; 791 int x = reserved;
792 792
793 if (get_option (&str, &io_start) != 2) 793 if (get_option (&str, &io_start) != 2)
diff --git a/kernel/sched.c b/kernel/sched.c
index 7c9098d186e6..01f55ada3598 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6541,6 +6541,11 @@ SYSCALL_DEFINE0(sched_yield)
6541 return 0; 6541 return 0;
6542} 6542}
6543 6543
6544static inline int should_resched(void)
6545{
6546 return need_resched() && !(preempt_count() & PREEMPT_ACTIVE);
6547}
6548
6544static void __cond_resched(void) 6549static void __cond_resched(void)
6545{ 6550{
6546#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 6551#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -6560,8 +6565,7 @@ static void __cond_resched(void)
6560 6565
6561int __sched _cond_resched(void) 6566int __sched _cond_resched(void)
6562{ 6567{
6563 if (need_resched() && !(preempt_count() & PREEMPT_ACTIVE) && 6568 if (should_resched()) {
6564 system_state == SYSTEM_RUNNING) {
6565 __cond_resched(); 6569 __cond_resched();
6566 return 1; 6570 return 1;
6567 } 6571 }
@@ -6579,12 +6583,12 @@ EXPORT_SYMBOL(_cond_resched);
6579 */ 6583 */
6580int cond_resched_lock(spinlock_t *lock) 6584int cond_resched_lock(spinlock_t *lock)
6581{ 6585{
6582 int resched = need_resched() && system_state == SYSTEM_RUNNING; 6586 int resched = should_resched();
6583 int ret = 0; 6587 int ret = 0;
6584 6588
6585 if (spin_needbreak(lock) || resched) { 6589 if (spin_needbreak(lock) || resched) {
6586 spin_unlock(lock); 6590 spin_unlock(lock);
6587 if (resched && need_resched()) 6591 if (resched)
6588 __cond_resched(); 6592 __cond_resched();
6589 else 6593 else
6590 cpu_relax(); 6594 cpu_relax();
@@ -6599,7 +6603,7 @@ int __sched cond_resched_softirq(void)
6599{ 6603{
6600 BUG_ON(!in_softirq()); 6604 BUG_ON(!in_softirq());
6601 6605
6602 if (need_resched() && system_state == SYSTEM_RUNNING) { 6606 if (should_resched()) {
6603 local_bh_enable(); 6607 local_bh_enable();
6604 __cond_resched(); 6608 __cond_resched();
6605 local_bh_disable(); 6609 local_bh_disable();
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1551f47e7669..019f380fd764 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -226,13 +226,13 @@ config BOOT_TRACER
226 the timings of the initcalls and traces key events and the identity 226 the timings of the initcalls and traces key events and the identity
227 of tasks that can cause boot delays, such as context-switches. 227 of tasks that can cause boot delays, such as context-switches.
228 228
229 Its aim is to be parsed by the /scripts/bootgraph.pl tool to 229 Its aim is to be parsed by the scripts/bootgraph.pl tool to
230 produce pretty graphics about boot inefficiencies, giving a visual 230 produce pretty graphics about boot inefficiencies, giving a visual
231 representation of the delays during initcalls - but the raw 231 representation of the delays during initcalls - but the raw
232 /debug/tracing/trace text output is readable too. 232 /debug/tracing/trace text output is readable too.
233 233
234 You must pass in ftrace=initcall to the kernel command line 234 You must pass in initcall_debug and ftrace=initcall to the kernel
235 to enable this on bootup. 235 command line to enable this on bootup.
236 236
237config TRACE_BRANCH_PROFILING 237config TRACE_BRANCH_PROFILING
238 bool 238 bool
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 39af8af6fc30..1090b0aed9ba 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -22,6 +22,7 @@
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/mutex.h> 23#include <linux/mutex.h>
24#include <linux/debugfs.h> 24#include <linux/debugfs.h>
25#include <linux/smp_lock.h>
25#include <linux/time.h> 26#include <linux/time.h>
26#include <linux/uaccess.h> 27#include <linux/uaccess.h>
27 28
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f3716bf04df6..bce9e01a29c8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3160,10 +3160,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
3160 3160
3161 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 3161 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
3162 3162
3163 if (ret || !write || (last_ftrace_enabled == ftrace_enabled)) 3163 if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled))
3164 goto out; 3164 goto out;
3165 3165
3166 last_ftrace_enabled = ftrace_enabled; 3166 last_ftrace_enabled = !!ftrace_enabled;
3167 3167
3168 if (ftrace_enabled) { 3168 if (ftrace_enabled) {
3169 3169
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3aa0a0dfdfa8..8bc8d8afea6a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -17,6 +17,7 @@
17#include <linux/writeback.h> 17#include <linux/writeback.h>
18#include <linux/kallsyms.h> 18#include <linux/kallsyms.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/smp_lock.h>
20#include <linux/notifier.h> 21#include <linux/notifier.h>
21#include <linux/irqflags.h> 22#include <linux/irqflags.h>
22#include <linux/debugfs.h> 23#include <linux/debugfs.h>
diff --git a/kernel/trace/trace_event_types.h b/kernel/trace/trace_event_types.h
index 5e32e375134d..6db005e12487 100644
--- a/kernel/trace/trace_event_types.h
+++ b/kernel/trace/trace_event_types.h
@@ -26,6 +26,9 @@ TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
26 ftrace_graph_ret_entry, ignore, 26 ftrace_graph_ret_entry, ignore,
27 TRACE_STRUCT( 27 TRACE_STRUCT(
28 TRACE_FIELD(unsigned long, ret.func, func) 28 TRACE_FIELD(unsigned long, ret.func, func)
29 TRACE_FIELD(unsigned long long, ret.calltime, calltime)
30 TRACE_FIELD(unsigned long long, ret.rettime, rettime)
31 TRACE_FIELD(unsigned long, ret.overrun, overrun)
29 TRACE_FIELD(int, ret.depth, depth) 32 TRACE_FIELD(int, ret.depth, depth)
30 ), 33 ),
31 TP_RAW_FMT("<-- %lx (%d)") 34 TP_RAW_FMT("<-- %lx (%d)")
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 7938f3ae93e3..e0c2545622e8 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -27,8 +27,7 @@ void trace_print_seq(struct seq_file *m, struct trace_seq *s)
27{ 27{
28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len; 28 int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
29 29
30 s->buffer[len] = 0; 30 seq_write(m, s->buffer, len);
31 seq_puts(m, s->buffer);
32 31
33 trace_seq_init(s); 32 trace_seq_init(s);
34} 33}
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 2d7aebd71dbd..e644af910124 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -326,10 +326,10 @@ stack_trace_sysctl(struct ctl_table *table, int write,
326 ret = proc_dointvec(table, write, file, buffer, lenp, ppos); 326 ret = proc_dointvec(table, write, file, buffer, lenp, ppos);
327 327
328 if (ret || !write || 328 if (ret || !write ||
329 (last_stack_tracer_enabled == stack_tracer_enabled)) 329 (last_stack_tracer_enabled == !!stack_tracer_enabled))
330 goto out; 330 goto out;
331 331
332 last_stack_tracer_enabled = stack_tracer_enabled; 332 last_stack_tracer_enabled = !!stack_tracer_enabled;
333 333
334 if (stack_tracer_enabled) 334 if (stack_tracer_enabled)
335 register_ftrace_function(&trace_ops); 335 register_ftrace_function(&trace_ops);