aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-05-08 04:50:00 -0400
committerIngo Molnar <mingo@elte.hu>2009-05-08 04:50:00 -0400
commitf066a155334642b8a206eec625b1925d88c48aeb (patch)
treecb12975e60b70d1dae3b7397bab955de78a4d01e /kernel
parente7c064889606aab3569669078c69b87b2c527e72 (diff)
parent33df4db04a79660150e1948e3296eeb451ac121b (diff)
Merge branch 'x86/urgent' into x86/xen
Conflicts: arch/frv/include/asm/pgtable.h arch/x86/include/asm/required-features.h arch/x86/xen/mmu.c Merge reason: x86/xen was on a .29 base still, move it to a fresher branch and pick up Xen fixes as well, plus resolve conflicts Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/audit_tree.c3
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c21
-rw-r--r--kernel/futex.c7
-rw-r--r--kernel/hung_task.c217
-rw-r--r--kernel/irq/devres.c16
-rw-r--r--kernel/irq/handle.c52
-rw-r--r--kernel/irq/manage.c194
-rw-r--r--kernel/irq/numa_migrate.c1
-rw-r--r--kernel/kthread.c26
-rw-r--r--kernel/lockdep.c22
-rw-r--r--kernel/module.c3
-rw-r--r--kernel/mutex.c3
-rw-r--r--kernel/panic.c12
-rw-r--r--kernel/posix-cpu-timers.c17
-rw-r--r--kernel/power/disk.c45
-rw-r--r--kernel/power/main.c24
-rw-r--r--kernel/power/swap.c2
-rw-r--r--kernel/power/user.c9
-rw-r--r--kernel/ptrace.c27
-rw-r--r--kernel/rcupdate.c18
-rw-r--r--kernel/rcutree.c19
-rw-r--r--kernel/rcutree_trace.c14
-rw-r--r--kernel/resource.c46
-rw-r--r--kernel/sched.c166
-rw-r--r--kernel/sched_cpupri.c5
-rw-r--r--kernel/sched_rt.c15
-rw-r--r--kernel/slow-work.c4
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/softlockup.c100
-rw-r--r--kernel/sys.c24
-rw-r--r--kernel/sysctl.c40
-rw-r--r--kernel/time/clocksource.c8
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/time/tick-common.c12
-rw-r--r--kernel/time/timekeeping.c12
-rw-r--r--kernel/timer.c7
-rw-r--r--kernel/trace/Kconfig4
-rw-r--r--kernel/trace/blktrace.c17
-rw-r--r--kernel/trace/trace.c58
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_branch.c8
-rw-r--r--kernel/trace/trace_events.c12
-rw-r--r--kernel/trace/trace_events_filter.c14
-rw-r--r--kernel/trace/trace_events_stage_2.h4
-rw-r--r--kernel/trace/trace_export.c2
-rw-r--r--kernel/trace/trace_output.c2
-rw-r--r--kernel/trace/trace_power.c7
-rw-r--r--kernel/trace/trace_sched_switch.c3
-rw-r--r--kernel/trace/trace_sched_wakeup.c8
-rw-r--r--kernel/trace/trace_syscalls.c2
-rw-r--r--kernel/workqueue.c36
53 files changed, 967 insertions, 412 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index bab1dffe37e9..42423665660a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
74obj-$(CONFIG_KPROBES) += kprobes.o 74obj-$(CONFIG_KPROBES) += kprobes.o
75obj-$(CONFIG_KGDB) += kgdb.o 75obj-$(CONFIG_KGDB) += kgdb.o
76obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o 76obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
77obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
77obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 78obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
78obj-$(CONFIG_SECCOMP) += seccomp.o 79obj-$(CONFIG_SECCOMP) += seccomp.o
79obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 80obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 917ab9525568..6e7351739a82 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -734,9 +734,6 @@ int audit_tag_tree(char *old, char *new)
734 dentry = dget(path.dentry); 734 dentry = dget(path.dentry);
735 path_put(&path); 735 path_put(&path);
736 736
737 if (dentry == tagged->mnt_root && dentry == mnt->mnt_root)
738 follow_up(&mnt, &dentry);
739
740 list_add_tail(&list, &tagged->mnt_list); 737 list_add_tail(&list, &tagged->mnt_list);
741 738
742 mutex_lock(&audit_filter_mutex); 739 mutex_lock(&audit_filter_mutex);
diff --git a/kernel/exit.c b/kernel/exit.c
index 32cbf2607cb0..abf9cf3b95c6 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -923,6 +923,8 @@ NORET_TYPE void do_exit(long code)
923 schedule(); 923 schedule();
924 } 924 }
925 925
926 exit_irq_thread();
927
926 exit_signals(tsk); /* sets PF_EXITING */ 928 exit_signals(tsk); /* sets PF_EXITING */
927 /* 929 /*
928 * tsk->flags are checked in the futex code to protect against 930 * tsk->flags are checked in the futex code to protect against
diff --git a/kernel/fork.c b/kernel/fork.c
index 660c2b8765bc..b9e2edd00726 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -645,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
645 645
646 tsk->min_flt = tsk->maj_flt = 0; 646 tsk->min_flt = tsk->maj_flt = 0;
647 tsk->nvcsw = tsk->nivcsw = 0; 647 tsk->nvcsw = tsk->nivcsw = 0;
648#ifdef CONFIG_DETECT_HUNG_TASK
649 tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
650#endif
648 651
649 tsk->mm = NULL; 652 tsk->mm = NULL;
650 tsk->active_mm = NULL; 653 tsk->active_mm = NULL;
@@ -797,6 +800,12 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
797 sig->cputime_expires.virt_exp = cputime_zero; 800 sig->cputime_expires.virt_exp = cputime_zero;
798 sig->cputime_expires.sched_exp = 0; 801 sig->cputime_expires.sched_exp = 0;
799 802
803 if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
804 sig->cputime_expires.prof_exp =
805 secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
806 sig->cputimer.running = 1;
807 }
808
800 /* The timer lists. */ 809 /* The timer lists. */
801 INIT_LIST_HEAD(&sig->cpu_timers[0]); 810 INIT_LIST_HEAD(&sig->cpu_timers[0]);
802 INIT_LIST_HEAD(&sig->cpu_timers[1]); 811 INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@ -812,11 +821,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
812 atomic_inc(&current->signal->live); 821 atomic_inc(&current->signal->live);
813 return 0; 822 return 0;
814 } 823 }
815 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
816
817 if (sig)
818 posix_cpu_timers_init_group(sig);
819 824
825 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
820 tsk->signal = sig; 826 tsk->signal = sig;
821 if (!sig) 827 if (!sig)
822 return -ENOMEM; 828 return -ENOMEM;
@@ -856,6 +862,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
856 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 862 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
857 task_unlock(current->group_leader); 863 task_unlock(current->group_leader);
858 864
865 posix_cpu_timers_init_group(sig);
866
859 acct_init_pacct(&sig->pacct); 867 acct_init_pacct(&sig->pacct);
860 868
861 tty_audit_fork(sig); 869 tty_audit_fork(sig);
@@ -1032,11 +1040,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1032 1040
1033 p->default_timer_slack_ns = current->timer_slack_ns; 1041 p->default_timer_slack_ns = current->timer_slack_ns;
1034 1042
1035#ifdef CONFIG_DETECT_SOFTLOCKUP
1036 p->last_switch_count = 0;
1037 p->last_switch_timestamp = 0;
1038#endif
1039
1040 task_io_accounting_init(&p->ioac); 1043 task_io_accounting_init(&p->ioac);
1041 acct_clear_integrals(p); 1044 acct_clear_integrals(p);
1042 1045
diff --git a/kernel/futex.c b/kernel/futex.c
index 6b50a024bca2..eef8cd26b5e5 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -883,7 +883,12 @@ retry_private:
883out_unlock: 883out_unlock:
884 double_unlock_hb(hb1, hb2); 884 double_unlock_hb(hb1, hb2);
885 885
886 /* drop_futex_key_refs() must be called outside the spinlocks. */ 886 /*
887 * drop_futex_key_refs() must be called outside the spinlocks. During
888 * the requeue we moved futex_q's from the hash bucket at key1 to the
889 * one at key2 and updated their key pointer. We no longer need to
890 * hold the references to key1.
891 */
887 while (--drop_count >= 0) 892 while (--drop_count >= 0)
888 drop_futex_key_refs(&key1); 893 drop_futex_key_refs(&key1);
889 894
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
new file mode 100644
index 000000000000..022a4927b785
--- /dev/null
+++ b/kernel/hung_task.c
@@ -0,0 +1,217 @@
1/*
2 * Detect Hung Task
3 *
4 * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
5 *
6 */
7
8#include <linux/mm.h>
9#include <linux/cpu.h>
10#include <linux/nmi.h>
11#include <linux/init.h>
12#include <linux/delay.h>
13#include <linux/freezer.h>
14#include <linux/kthread.h>
15#include <linux/lockdep.h>
16#include <linux/module.h>
17#include <linux/sysctl.h>
18
19/*
20 * The number of tasks checked:
21 */
22unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
23
24/*
25 * Limit number of tasks checked in a batch.
26 *
27 * This value controls the preemptibility of khungtaskd since preemption
28 * is disabled during the critical section. It also controls the size of
29 * the RCU grace period. So it needs to be upper-bound.
30 */
31#define HUNG_TASK_BATCHING 1024
32
33/*
34 * Zero means infinite timeout - no checking done:
35 */
36unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
37
38unsigned long __read_mostly sysctl_hung_task_warnings = 10;
39
40static int __read_mostly did_panic;
41
42static struct task_struct *watchdog_task;
43
44/*
45 * Should we panic (and reboot, if panic_timeout= is set) when a
46 * hung task is detected:
47 */
48unsigned int __read_mostly sysctl_hung_task_panic =
49 CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
50
51static int __init hung_task_panic_setup(char *str)
52{
53 sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
54
55 return 1;
56}
57__setup("hung_task_panic=", hung_task_panic_setup);
58
59static int
60hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
61{
62 did_panic = 1;
63
64 return NOTIFY_DONE;
65}
66
67static struct notifier_block panic_block = {
68 .notifier_call = hung_task_panic,
69};
70
71static void check_hung_task(struct task_struct *t, unsigned long timeout)
72{
73 unsigned long switch_count = t->nvcsw + t->nivcsw;
74
75 /*
76 * Ensure the task is not frozen.
77 * Also, when a freshly created task is scheduled once, changes
78 * its state to TASK_UNINTERRUPTIBLE without having ever been
79 * switched out once, it musn't be checked.
80 */
81 if (unlikely(t->flags & PF_FROZEN || !switch_count))
82 return;
83
84 if (switch_count != t->last_switch_count) {
85 t->last_switch_count = switch_count;
86 return;
87 }
88 if (!sysctl_hung_task_warnings)
89 return;
90 sysctl_hung_task_warnings--;
91
92 /*
93 * Ok, the task did not get scheduled for more than 2 minutes,
94 * complain:
95 */
96 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
97 "%ld seconds.\n", t->comm, t->pid, timeout);
98 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
99 " disables this message.\n");
100 sched_show_task(t);
101 __debug_show_held_locks(t);
102
103 touch_nmi_watchdog();
104
105 if (sysctl_hung_task_panic)
106 panic("hung_task: blocked tasks");
107}
108
109/*
110 * To avoid extending the RCU grace period for an unbounded amount of time,
111 * periodically exit the critical section and enter a new one.
112 *
113 * For preemptible RCU it is sufficient to call rcu_read_unlock in order
114 * exit the grace period. For classic RCU, a reschedule is required.
115 */
116static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
117{
118 get_task_struct(g);
119 get_task_struct(t);
120 rcu_read_unlock();
121 cond_resched();
122 rcu_read_lock();
123 put_task_struct(t);
124 put_task_struct(g);
125}
126
127/*
128 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
129 * a really long time (120 seconds). If that happens, print out
130 * a warning.
131 */
132static void check_hung_uninterruptible_tasks(unsigned long timeout)
133{
134 int max_count = sysctl_hung_task_check_count;
135 int batch_count = HUNG_TASK_BATCHING;
136 struct task_struct *g, *t;
137
138 /*
139 * If the system crashed already then all bets are off,
140 * do not report extra hung tasks:
141 */
142 if (test_taint(TAINT_DIE) || did_panic)
143 return;
144
145 rcu_read_lock();
146 do_each_thread(g, t) {
147 if (!--max_count)
148 goto unlock;
149 if (!--batch_count) {
150 batch_count = HUNG_TASK_BATCHING;
151 rcu_lock_break(g, t);
152 /* Exit if t or g was unhashed during refresh. */
153 if (t->state == TASK_DEAD || g->state == TASK_DEAD)
154 goto unlock;
155 }
156 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
157 if (t->state == TASK_UNINTERRUPTIBLE)
158 check_hung_task(t, timeout);
159 } while_each_thread(g, t);
160 unlock:
161 rcu_read_unlock();
162}
163
164static unsigned long timeout_jiffies(unsigned long timeout)
165{
166 /* timeout of 0 will disable the watchdog */
167 return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
168}
169
170/*
171 * Process updating of timeout sysctl
172 */
173int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
174 struct file *filp, void __user *buffer,
175 size_t *lenp, loff_t *ppos)
176{
177 int ret;
178
179 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
180
181 if (ret || !write)
182 goto out;
183
184 wake_up_process(watchdog_task);
185
186 out:
187 return ret;
188}
189
190/*
191 * kthread which checks for tasks stuck in D state
192 */
193static int watchdog(void *dummy)
194{
195 set_user_nice(current, 0);
196
197 for ( ; ; ) {
198 unsigned long timeout = sysctl_hung_task_timeout_secs;
199
200 while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
201 timeout = sysctl_hung_task_timeout_secs;
202
203 check_hung_uninterruptible_tasks(timeout);
204 }
205
206 return 0;
207}
208
209static int __init hung_task_init(void)
210{
211 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
212 watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
213
214 return 0;
215}
216
217module_init(hung_task_init);
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index 38a25b8d8bff..d06df9c41cba 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -26,10 +26,12 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
26} 26}
27 27
28/** 28/**
29 * devm_request_irq - allocate an interrupt line for a managed device 29 * devm_request_threaded_irq - allocate an interrupt line for a managed device
30 * @dev: device to request interrupt for 30 * @dev: device to request interrupt for
31 * @irq: Interrupt line to allocate 31 * @irq: Interrupt line to allocate
32 * @handler: Function to be called when the IRQ occurs 32 * @handler: Function to be called when the IRQ occurs
33 * @thread_fn: function to be called in a threaded interrupt context. NULL
34 * for devices which handle everything in @handler
33 * @irqflags: Interrupt type flags 35 * @irqflags: Interrupt type flags
34 * @devname: An ascii name for the claiming device 36 * @devname: An ascii name for the claiming device
35 * @dev_id: A cookie passed back to the handler function 37 * @dev_id: A cookie passed back to the handler function
@@ -42,9 +44,10 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
42 * If an IRQ allocated with this function needs to be freed 44 * If an IRQ allocated with this function needs to be freed
43 * separately, dev_free_irq() must be used. 45 * separately, dev_free_irq() must be used.
44 */ 46 */
45int devm_request_irq(struct device *dev, unsigned int irq, 47int devm_request_threaded_irq(struct device *dev, unsigned int irq,
46 irq_handler_t handler, unsigned long irqflags, 48 irq_handler_t handler, irq_handler_t thread_fn,
47 const char *devname, void *dev_id) 49 unsigned long irqflags, const char *devname,
50 void *dev_id)
48{ 51{
49 struct irq_devres *dr; 52 struct irq_devres *dr;
50 int rc; 53 int rc;
@@ -54,7 +57,8 @@ int devm_request_irq(struct device *dev, unsigned int irq,
54 if (!dr) 57 if (!dr)
55 return -ENOMEM; 58 return -ENOMEM;
56 59
57 rc = request_irq(irq, handler, irqflags, devname, dev_id); 60 rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname,
61 dev_id);
58 if (rc) { 62 if (rc) {
59 devres_free(dr); 63 devres_free(dr);
60 return rc; 64 return rc;
@@ -66,7 +70,7 @@ int devm_request_irq(struct device *dev, unsigned int irq,
66 70
67 return 0; 71 return 0;
68} 72}
69EXPORT_SYMBOL(devm_request_irq); 73EXPORT_SYMBOL(devm_request_threaded_irq);
70 74
71/** 75/**
72 * devm_free_irq - free an interrupt 76 * devm_free_irq - free an interrupt
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 343acecae629..26e08754744f 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -339,6 +339,15 @@ irqreturn_t no_action(int cpl, void *dev_id)
339 return IRQ_NONE; 339 return IRQ_NONE;
340} 340}
341 341
342static void warn_no_thread(unsigned int irq, struct irqaction *action)
343{
344 if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags))
345 return;
346
347 printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD "
348 "but no thread function available.", irq, action->name);
349}
350
342DEFINE_TRACE(irq_handler_entry); 351DEFINE_TRACE(irq_handler_entry);
343DEFINE_TRACE(irq_handler_exit); 352DEFINE_TRACE(irq_handler_exit);
344 353
@@ -354,8 +363,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
354 irqreturn_t ret, retval = IRQ_NONE; 363 irqreturn_t ret, retval = IRQ_NONE;
355 unsigned int status = 0; 364 unsigned int status = 0;
356 365
357 WARN_ONCE(!in_irq(), "BUG: IRQ handler called from non-hardirq context!");
358
359 if (!(action->flags & IRQF_DISABLED)) 366 if (!(action->flags & IRQF_DISABLED))
360 local_irq_enable_in_hardirq(); 367 local_irq_enable_in_hardirq();
361 368
@@ -363,8 +370,47 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
363 trace_irq_handler_entry(irq, action); 370 trace_irq_handler_entry(irq, action);
364 ret = action->handler(irq, action->dev_id); 371 ret = action->handler(irq, action->dev_id);
365 trace_irq_handler_exit(irq, action, ret); 372 trace_irq_handler_exit(irq, action, ret);
366 if (ret == IRQ_HANDLED) 373
374 switch (ret) {
375 case IRQ_WAKE_THREAD:
376 /*
377 * Set result to handled so the spurious check
378 * does not trigger.
379 */
380 ret = IRQ_HANDLED;
381
382 /*
383 * Catch drivers which return WAKE_THREAD but
384 * did not set up a thread function
385 */
386 if (unlikely(!action->thread_fn)) {
387 warn_no_thread(irq, action);
388 break;
389 }
390
391 /*
392 * Wake up the handler thread for this
393 * action. In case the thread crashed and was
394 * killed we just pretend that we handled the
395 * interrupt. The hardirq handler above has
396 * disabled the device interrupt, so no irq
397 * storm is lurking.
398 */
399 if (likely(!test_bit(IRQTF_DIED,
400 &action->thread_flags))) {
401 set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
402 wake_up_process(action->thread);
403 }
404
405 /* Fall through to add to randomness */
406 case IRQ_HANDLED:
367 status |= action->flags; 407 status |= action->flags;
408 break;
409
410 default:
411 break;
412 }
413
368 retval |= ret; 414 retval |= ret;
369 action = action->next; 415 action = action->next;
370 } while (action); 416 } while (action);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1516ab77355c..2734eca59243 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -8,16 +8,15 @@
8 */ 8 */
9 9
10#include <linux/irq.h> 10#include <linux/irq.h>
11#include <linux/kthread.h>
11#include <linux/module.h> 12#include <linux/module.h>
12#include <linux/random.h> 13#include <linux/random.h>
13#include <linux/interrupt.h> 14#include <linux/interrupt.h>
14#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/sched.h>
15 17
16#include "internals.h" 18#include "internals.h"
17 19
18#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
19cpumask_var_t irq_default_affinity;
20
21/** 20/**
22 * synchronize_irq - wait for pending IRQ handlers (on other CPUs) 21 * synchronize_irq - wait for pending IRQ handlers (on other CPUs)
23 * @irq: interrupt number to wait for 22 * @irq: interrupt number to wait for
@@ -53,9 +52,18 @@ void synchronize_irq(unsigned int irq)
53 52
54 /* Oops, that failed? */ 53 /* Oops, that failed? */
55 } while (status & IRQ_INPROGRESS); 54 } while (status & IRQ_INPROGRESS);
55
56 /*
57 * We made sure that no hardirq handler is running. Now verify
58 * that no threaded handlers are active.
59 */
60 wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active));
56} 61}
57EXPORT_SYMBOL(synchronize_irq); 62EXPORT_SYMBOL(synchronize_irq);
58 63
64#ifdef CONFIG_SMP
65cpumask_var_t irq_default_affinity;
66
59/** 67/**
60 * irq_can_set_affinity - Check if the affinity of a given irq can be set 68 * irq_can_set_affinity - Check if the affinity of a given irq can be set
61 * @irq: Interrupt to check 69 * @irq: Interrupt to check
@@ -72,6 +80,18 @@ int irq_can_set_affinity(unsigned int irq)
72 return 1; 80 return 1;
73} 81}
74 82
83static void
84irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
85{
86 struct irqaction *action = desc->action;
87
88 while (action) {
89 if (action->thread)
90 set_cpus_allowed_ptr(action->thread, cpumask);
91 action = action->next;
92 }
93}
94
75/** 95/**
76 * irq_set_affinity - Set the irq affinity of a given irq 96 * irq_set_affinity - Set the irq affinity of a given irq
77 * @irq: Interrupt to set affinity 97 * @irq: Interrupt to set affinity
@@ -89,10 +109,9 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
89 spin_lock_irqsave(&desc->lock, flags); 109 spin_lock_irqsave(&desc->lock, flags);
90 110
91#ifdef CONFIG_GENERIC_PENDING_IRQ 111#ifdef CONFIG_GENERIC_PENDING_IRQ
92 if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) { 112 if (desc->status & IRQ_MOVE_PCNTXT)
93 cpumask_copy(desc->affinity, cpumask);
94 desc->chip->set_affinity(irq, cpumask); 113 desc->chip->set_affinity(irq, cpumask);
95 } else { 114 else {
96 desc->status |= IRQ_MOVE_PENDING; 115 desc->status |= IRQ_MOVE_PENDING;
97 cpumask_copy(desc->pending_mask, cpumask); 116 cpumask_copy(desc->pending_mask, cpumask);
98 } 117 }
@@ -100,6 +119,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
100 cpumask_copy(desc->affinity, cpumask); 119 cpumask_copy(desc->affinity, cpumask);
101 desc->chip->set_affinity(irq, cpumask); 120 desc->chip->set_affinity(irq, cpumask);
102#endif 121#endif
122 irq_set_thread_affinity(desc, cpumask);
103 desc->status |= IRQ_AFFINITY_SET; 123 desc->status |= IRQ_AFFINITY_SET;
104 spin_unlock_irqrestore(&desc->lock, flags); 124 spin_unlock_irqrestore(&desc->lock, flags);
105 return 0; 125 return 0;
@@ -150,6 +170,8 @@ int irq_select_affinity_usr(unsigned int irq)
150 170
151 spin_lock_irqsave(&desc->lock, flags); 171 spin_lock_irqsave(&desc->lock, flags);
152 ret = setup_affinity(irq, desc); 172 ret = setup_affinity(irq, desc);
173 if (!ret)
174 irq_set_thread_affinity(desc, desc->affinity);
153 spin_unlock_irqrestore(&desc->lock, flags); 175 spin_unlock_irqrestore(&desc->lock, flags);
154 176
155 return ret; 177 return ret;
@@ -401,6 +423,90 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
401 return ret; 423 return ret;
402} 424}
403 425
426static int irq_wait_for_interrupt(struct irqaction *action)
427{
428 while (!kthread_should_stop()) {
429 set_current_state(TASK_INTERRUPTIBLE);
430
431 if (test_and_clear_bit(IRQTF_RUNTHREAD,
432 &action->thread_flags)) {
433 __set_current_state(TASK_RUNNING);
434 return 0;
435 }
436 schedule();
437 }
438 return -1;
439}
440
441/*
442 * Interrupt handler thread
443 */
444static int irq_thread(void *data)
445{
446 struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, };
447 struct irqaction *action = data;
448 struct irq_desc *desc = irq_to_desc(action->irq);
449 int wake;
450
451 sched_setscheduler(current, SCHED_FIFO, &param);
452 current->irqaction = action;
453
454 while (!irq_wait_for_interrupt(action)) {
455
456 atomic_inc(&desc->threads_active);
457
458 spin_lock_irq(&desc->lock);
459 if (unlikely(desc->status & IRQ_DISABLED)) {
460 /*
461 * CHECKME: We might need a dedicated
462 * IRQ_THREAD_PENDING flag here, which
463 * retriggers the thread in check_irq_resend()
464 * but AFAICT IRQ_PENDING should be fine as it
465 * retriggers the interrupt itself --- tglx
466 */
467 desc->status |= IRQ_PENDING;
468 spin_unlock_irq(&desc->lock);
469 } else {
470 spin_unlock_irq(&desc->lock);
471
472 action->thread_fn(action->irq, action->dev_id);
473 }
474
475 wake = atomic_dec_and_test(&desc->threads_active);
476
477 if (wake && waitqueue_active(&desc->wait_for_threads))
478 wake_up(&desc->wait_for_threads);
479 }
480
481 /*
482 * Clear irqaction. Otherwise exit_irq_thread() would make
483 * fuzz about an active irq thread going into nirvana.
484 */
485 current->irqaction = NULL;
486 return 0;
487}
488
489/*
490 * Called from do_exit()
491 */
492void exit_irq_thread(void)
493{
494 struct task_struct *tsk = current;
495
496 if (!tsk->irqaction)
497 return;
498
499 printk(KERN_ERR
500 "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
501 tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
502
503 /*
504 * Set the THREAD DIED flag to prevent further wakeups of the
505 * soon to be gone threaded handler.
506 */
507 set_bit(IRQTF_DIED, &tsk->irqaction->flags);
508}
509
404/* 510/*
405 * Internal function to register an irqaction - typically used to 511 * Internal function to register an irqaction - typically used to
406 * allocate special interrupts that are part of the architecture. 512 * allocate special interrupts that are part of the architecture.
@@ -437,6 +543,26 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
437 } 543 }
438 544
439 /* 545 /*
546 * Threaded handler ?
547 */
548 if (new->thread_fn) {
549 struct task_struct *t;
550
551 t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
552 new->name);
553 if (IS_ERR(t))
554 return PTR_ERR(t);
555 /*
556 * We keep the reference to the task struct even if
557 * the thread dies to avoid that the interrupt code
558 * references an already freed task_struct.
559 */
560 get_task_struct(t);
561 new->thread = t;
562 wake_up_process(t);
563 }
564
565 /*
440 * The following block of code has to be executed atomically 566 * The following block of code has to be executed atomically
441 */ 567 */
442 spin_lock_irqsave(&desc->lock, flags); 568 spin_lock_irqsave(&desc->lock, flags);
@@ -473,15 +599,15 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
473 if (!shared) { 599 if (!shared) {
474 irq_chip_set_defaults(desc->chip); 600 irq_chip_set_defaults(desc->chip);
475 601
602 init_waitqueue_head(&desc->wait_for_threads);
603
476 /* Setup the type (level, edge polarity) if configured: */ 604 /* Setup the type (level, edge polarity) if configured: */
477 if (new->flags & IRQF_TRIGGER_MASK) { 605 if (new->flags & IRQF_TRIGGER_MASK) {
478 ret = __irq_set_trigger(desc, irq, 606 ret = __irq_set_trigger(desc, irq,
479 new->flags & IRQF_TRIGGER_MASK); 607 new->flags & IRQF_TRIGGER_MASK);
480 608
481 if (ret) { 609 if (ret)
482 spin_unlock_irqrestore(&desc->lock, flags); 610 goto out_thread;
483 return ret;
484 }
485 } else 611 } else
486 compat_irq_chip_set_default_handler(desc); 612 compat_irq_chip_set_default_handler(desc);
487#if defined(CONFIG_IRQ_PER_CPU) 613#if defined(CONFIG_IRQ_PER_CPU)
@@ -549,8 +675,19 @@ mismatch:
549 dump_stack(); 675 dump_stack();
550 } 676 }
551#endif 677#endif
678 ret = -EBUSY;
679
680out_thread:
552 spin_unlock_irqrestore(&desc->lock, flags); 681 spin_unlock_irqrestore(&desc->lock, flags);
553 return -EBUSY; 682 if (new->thread) {
683 struct task_struct *t = new->thread;
684
685 new->thread = NULL;
686 if (likely(!test_bit(IRQTF_DIED, &new->thread_flags)))
687 kthread_stop(t);
688 put_task_struct(t);
689 }
690 return ret;
554} 691}
555 692
556/** 693/**
@@ -576,6 +713,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
576{ 713{
577 struct irq_desc *desc = irq_to_desc(irq); 714 struct irq_desc *desc = irq_to_desc(irq);
578 struct irqaction *action, **action_ptr; 715 struct irqaction *action, **action_ptr;
716 struct task_struct *irqthread;
579 unsigned long flags; 717 unsigned long flags;
580 718
581 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); 719 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@ -622,6 +760,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
622 else 760 else
623 desc->chip->disable(irq); 761 desc->chip->disable(irq);
624 } 762 }
763
764 irqthread = action->thread;
765 action->thread = NULL;
766
625 spin_unlock_irqrestore(&desc->lock, flags); 767 spin_unlock_irqrestore(&desc->lock, flags);
626 768
627 unregister_handler_proc(irq, action); 769 unregister_handler_proc(irq, action);
@@ -629,6 +771,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
629 /* Make sure it's not being used on another CPU: */ 771 /* Make sure it's not being used on another CPU: */
630 synchronize_irq(irq); 772 synchronize_irq(irq);
631 773
774 if (irqthread) {
775 if (!test_bit(IRQTF_DIED, &action->thread_flags))
776 kthread_stop(irqthread);
777 put_task_struct(irqthread);
778 }
779
632#ifdef CONFIG_DEBUG_SHIRQ 780#ifdef CONFIG_DEBUG_SHIRQ
633 /* 781 /*
634 * It's a shared IRQ -- the driver ought to be prepared for an IRQ 782 * It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@ -681,9 +829,12 @@ void free_irq(unsigned int irq, void *dev_id)
681EXPORT_SYMBOL(free_irq); 829EXPORT_SYMBOL(free_irq);
682 830
683/** 831/**
684 * request_irq - allocate an interrupt line 832 * request_threaded_irq - allocate an interrupt line
685 * @irq: Interrupt line to allocate 833 * @irq: Interrupt line to allocate
686 * @handler: Function to be called when the IRQ occurs 834 * @handler: Function to be called when the IRQ occurs.
835 * Primary handler for threaded interrupts
836 * @thread_fn: Function called from the irq handler thread
837 * If NULL, no irq thread is created
687 * @irqflags: Interrupt type flags 838 * @irqflags: Interrupt type flags
688 * @devname: An ascii name for the claiming device 839 * @devname: An ascii name for the claiming device
689 * @dev_id: A cookie passed back to the handler function 840 * @dev_id: A cookie passed back to the handler function
@@ -695,6 +846,15 @@ EXPORT_SYMBOL(free_irq);
695 * raises, you must take care both to initialise your hardware 846 * raises, you must take care both to initialise your hardware
696 * and to set up the interrupt handler in the right order. 847 * and to set up the interrupt handler in the right order.
697 * 848 *
849 * If you want to set up a threaded irq handler for your device
850 * then you need to supply @handler and @thread_fn. @handler ist
851 * still called in hard interrupt context and has to check
852 * whether the interrupt originates from the device. If yes it
853 * needs to disable the interrupt on the device and return
854 * IRQ_THREAD_WAKE which will wake up the handler thread and run
855 * @thread_fn. This split handler design is necessary to support
856 * shared interrupts.
857 *
698 * Dev_id must be globally unique. Normally the address of the 858 * Dev_id must be globally unique. Normally the address of the
699 * device data structure is used as the cookie. Since the handler 859 * device data structure is used as the cookie. Since the handler
700 * receives this value it makes sense to use it. 860 * receives this value it makes sense to use it.
@@ -710,8 +870,9 @@ EXPORT_SYMBOL(free_irq);
710 * IRQF_TRIGGER_* Specify active edge(s) or level 870 * IRQF_TRIGGER_* Specify active edge(s) or level
711 * 871 *
712 */ 872 */
713int request_irq(unsigned int irq, irq_handler_t handler, 873int request_threaded_irq(unsigned int irq, irq_handler_t handler,
714 unsigned long irqflags, const char *devname, void *dev_id) 874 irq_handler_t thread_fn, unsigned long irqflags,
875 const char *devname, void *dev_id)
715{ 876{
716 struct irqaction *action; 877 struct irqaction *action;
717 struct irq_desc *desc; 878 struct irq_desc *desc;
@@ -759,6 +920,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
759 return -ENOMEM; 920 return -ENOMEM;
760 921
761 action->handler = handler; 922 action->handler = handler;
923 action->thread_fn = thread_fn;
762 action->flags = irqflags; 924 action->flags = irqflags;
763 action->name = devname; 925 action->name = devname;
764 action->dev_id = dev_id; 926 action->dev_id = dev_id;
@@ -788,4 +950,4 @@ int request_irq(unsigned int irq, irq_handler_t handler,
788#endif 950#endif
789 return retval; 951 return retval;
790} 952}
791EXPORT_SYMBOL(request_irq); 953EXPORT_SYMBOL(request_threaded_irq);
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 243d6121e50e..44bbdcbaf8d2 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -54,6 +54,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
54static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) 54static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
55{ 55{
56 free_kstat_irqs(old_desc, desc); 56 free_kstat_irqs(old_desc, desc);
57 free_desc_masks(old_desc, desc);
57 arch_free_chip_data(old_desc, desc); 58 arch_free_chip_data(old_desc, desc);
58} 59}
59 60
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 84bbadd4d021..4ebaf8519abf 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -76,6 +76,7 @@ static int kthread(void *_create)
76 76
77 /* OK, tell user we're spawned, wait for stop or wakeup */ 77 /* OK, tell user we're spawned, wait for stop or wakeup */
78 __set_current_state(TASK_UNINTERRUPTIBLE); 78 __set_current_state(TASK_UNINTERRUPTIBLE);
79 create->result = current;
79 complete(&create->started); 80 complete(&create->started);
80 schedule(); 81 schedule();
81 82
@@ -96,22 +97,10 @@ static void create_kthread(struct kthread_create_info *create)
96 97
97 /* We want our own signal handler (we take no signals by default). */ 98 /* We want our own signal handler (we take no signals by default). */
98 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); 99 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
99 if (pid < 0) { 100 if (pid < 0)
100 create->result = ERR_PTR(pid); 101 create->result = ERR_PTR(pid);
101 } else { 102 else
102 struct sched_param param = { .sched_priority = 0 };
103 wait_for_completion(&create->started); 103 wait_for_completion(&create->started);
104 read_lock(&tasklist_lock);
105 create->result = find_task_by_pid_ns(pid, &init_pid_ns);
106 read_unlock(&tasklist_lock);
107 /*
108 * root may have changed our (kthreadd's) priority or CPU mask.
109 * The kernel thread should not inherit these properties.
110 */
111 sched_setscheduler(create->result, SCHED_NORMAL, &param);
112 set_user_nice(create->result, KTHREAD_NICE_LEVEL);
113 set_cpus_allowed_ptr(create->result, cpu_all_mask);
114 }
115 complete(&create->done); 104 complete(&create->done);
116} 105}
117 106
@@ -154,11 +143,20 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
154 wait_for_completion(&create.done); 143 wait_for_completion(&create.done);
155 144
156 if (!IS_ERR(create.result)) { 145 if (!IS_ERR(create.result)) {
146 struct sched_param param = { .sched_priority = 0 };
157 va_list args; 147 va_list args;
148
158 va_start(args, namefmt); 149 va_start(args, namefmt);
159 vsnprintf(create.result->comm, sizeof(create.result->comm), 150 vsnprintf(create.result->comm, sizeof(create.result->comm),
160 namefmt, args); 151 namefmt, args);
161 va_end(args); 152 va_end(args);
153 /*
154 * root may have changed our (kthreadd's) priority or CPU mask.
155 * The kernel thread should not inherit these properties.
156 */
157 sched_setscheduler_nocheck(create.result, SCHED_NORMAL, &param);
158 set_user_nice(create.result, KTHREAD_NICE_LEVEL);
159 set_cpus_allowed_ptr(create.result, cpu_all_mask);
162 } 160 }
163 return create.result; 161 return create.result;
164} 162}
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index b0f011866969..accb40cdb12a 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2490,13 +2490,20 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
2490void lockdep_init_map(struct lockdep_map *lock, const char *name, 2490void lockdep_init_map(struct lockdep_map *lock, const char *name,
2491 struct lock_class_key *key, int subclass) 2491 struct lock_class_key *key, int subclass)
2492{ 2492{
2493 if (unlikely(!debug_locks)) 2493 lock->class_cache = NULL;
2494#ifdef CONFIG_LOCK_STAT
2495 lock->cpu = raw_smp_processor_id();
2496#endif
2497
2498 if (DEBUG_LOCKS_WARN_ON(!name)) {
2499 lock->name = "NULL";
2494 return; 2500 return;
2501 }
2502
2503 lock->name = name;
2495 2504
2496 if (DEBUG_LOCKS_WARN_ON(!key)) 2505 if (DEBUG_LOCKS_WARN_ON(!key))
2497 return; 2506 return;
2498 if (DEBUG_LOCKS_WARN_ON(!name))
2499 return;
2500 /* 2507 /*
2501 * Sanity check, the lock-class key must be persistent: 2508 * Sanity check, the lock-class key must be persistent:
2502 */ 2509 */
@@ -2505,12 +2512,11 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
2505 DEBUG_LOCKS_WARN_ON(1); 2512 DEBUG_LOCKS_WARN_ON(1);
2506 return; 2513 return;
2507 } 2514 }
2508 lock->name = name;
2509 lock->key = key; 2515 lock->key = key;
2510 lock->class_cache = NULL; 2516
2511#ifdef CONFIG_LOCK_STAT 2517 if (unlikely(!debug_locks))
2512 lock->cpu = raw_smp_processor_id(); 2518 return;
2513#endif 2519
2514 if (subclass) 2520 if (subclass)
2515 register_lock_class(lock, subclass, 1); 2521 register_lock_class(lock, subclass, 1);
2516} 2522}
diff --git a/kernel/module.c b/kernel/module.c
index 05f014efa32c..e797812a4d95 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2388,6 +2388,9 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2388 blocking_notifier_call_chain(&module_notify_list, 2388 blocking_notifier_call_chain(&module_notify_list,
2389 MODULE_STATE_LIVE, mod); 2389 MODULE_STATE_LIVE, mod);
2390 2390
2391 /* We need to finish all async code before the module init sequence is done */
2392 async_synchronize_full();
2393
2391 mutex_lock(&module_mutex); 2394 mutex_lock(&module_mutex);
2392 /* Drop initial reference. */ 2395 /* Drop initial reference. */
2393 module_put(mod); 2396 module_put(mod);
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 5d79781394a3..507cf2b5e9f1 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -148,7 +148,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
148 148
149 preempt_disable(); 149 preempt_disable();
150 mutex_acquire(&lock->dep_map, subclass, 0, ip); 150 mutex_acquire(&lock->dep_map, subclass, 0, ip);
151#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) 151#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) && \
152 !defined(CONFIG_HAVE_DEFAULT_NO_SPIN_MUTEXES)
152 /* 153 /*
153 * Optimistic spinning. 154 * Optimistic spinning.
154 * 155 *
diff --git a/kernel/panic.c b/kernel/panic.c
index 3fd8c5bf8b39..3dcaa1661357 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -213,8 +213,16 @@ unsigned long get_taint(void)
213 213
214void add_taint(unsigned flag) 214void add_taint(unsigned flag)
215{ 215{
216 /* can't trust the integrity of the kernel anymore: */ 216 /*
217 debug_locks = 0; 217 * Can't trust the integrity of the kernel anymore.
218 * We don't call directly debug_locks_off() because the issue
219 * is not necessarily serious enough to set oops_in_progress to 1
220 * Also we want to keep up lockdep for staging development and
221 * post-warning case.
222 */
223 if (flag != TAINT_CRAP && flag != TAINT_WARN && __debug_locks_off())
224 printk(KERN_WARNING "Disabling lock debugging due to kernel taint\n");
225
218 set_bit(flag, &tainted_mask); 226 set_bit(flag, &tainted_mask);
219} 227}
220EXPORT_SYMBOL(add_taint); 228EXPORT_SYMBOL(add_taint);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 8e5d9a68b022..bece7c0b67b2 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -18,7 +18,7 @@ void update_rlimit_cpu(unsigned long rlim_new)
18 18
19 cputime = secs_to_cputime(rlim_new); 19 cputime = secs_to_cputime(rlim_new);
20 if (cputime_eq(current->signal->it_prof_expires, cputime_zero) || 20 if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
21 cputime_lt(current->signal->it_prof_expires, cputime)) { 21 cputime_gt(current->signal->it_prof_expires, cputime)) {
22 spin_lock_irq(&current->sighand->siglock); 22 spin_lock_irq(&current->sighand->siglock);
23 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); 23 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
24 spin_unlock_irq(&current->sighand->siglock); 24 spin_unlock_irq(&current->sighand->siglock);
@@ -224,7 +224,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
224 cpu->cpu = virt_ticks(p); 224 cpu->cpu = virt_ticks(p);
225 break; 225 break;
226 case CPUCLOCK_SCHED: 226 case CPUCLOCK_SCHED:
227 cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p); 227 cpu->sched = task_sched_runtime(p);
228 break; 228 break;
229 } 229 }
230 return 0; 230 return 0;
@@ -305,18 +305,19 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
305{ 305{
306 struct task_cputime cputime; 306 struct task_cputime cputime;
307 307
308 thread_group_cputime(p, &cputime);
309 switch (CPUCLOCK_WHICH(which_clock)) { 308 switch (CPUCLOCK_WHICH(which_clock)) {
310 default: 309 default:
311 return -EINVAL; 310 return -EINVAL;
312 case CPUCLOCK_PROF: 311 case CPUCLOCK_PROF:
312 thread_group_cputime(p, &cputime);
313 cpu->cpu = cputime_add(cputime.utime, cputime.stime); 313 cpu->cpu = cputime_add(cputime.utime, cputime.stime);
314 break; 314 break;
315 case CPUCLOCK_VIRT: 315 case CPUCLOCK_VIRT:
316 thread_group_cputime(p, &cputime);
316 cpu->cpu = cputime.utime; 317 cpu->cpu = cputime.utime;
317 break; 318 break;
318 case CPUCLOCK_SCHED: 319 case CPUCLOCK_SCHED:
319 cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); 320 cpu->sched = thread_group_sched_runtime(p);
320 break; 321 break;
321 } 322 }
322 return 0; 323 return 0;
@@ -1419,19 +1420,19 @@ void run_posix_cpu_timers(struct task_struct *tsk)
1419 * timer call will interfere. 1420 * timer call will interfere.
1420 */ 1421 */
1421 list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) { 1422 list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
1422 int firing; 1423 int cpu_firing;
1424
1423 spin_lock(&timer->it_lock); 1425 spin_lock(&timer->it_lock);
1424 list_del_init(&timer->it.cpu.entry); 1426 list_del_init(&timer->it.cpu.entry);
1425 firing = timer->it.cpu.firing; 1427 cpu_firing = timer->it.cpu.firing;
1426 timer->it.cpu.firing = 0; 1428 timer->it.cpu.firing = 0;
1427 /* 1429 /*
1428 * The firing flag is -1 if we collided with a reset 1430 * The firing flag is -1 if we collided with a reset
1429 * of the timer, which already reported this 1431 * of the timer, which already reported this
1430 * almost-firing as an overrun. So don't generate an event. 1432 * almost-firing as an overrun. So don't generate an event.
1431 */ 1433 */
1432 if (likely(firing >= 0)) { 1434 if (likely(cpu_firing >= 0))
1433 cpu_timer_fire(timer); 1435 cpu_timer_fire(timer);
1434 }
1435 spin_unlock(&timer->it_lock); 1436 spin_unlock(&timer->it_lock);
1436 } 1437 }
1437} 1438}
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 5f21ab2bbcdf..e71ca9cd81b2 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -22,6 +22,7 @@
22#include <linux/console.h> 22#include <linux/console.h>
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/freezer.h> 24#include <linux/freezer.h>
25#include <scsi/scsi_scan.h>
25#include <asm/suspend.h> 26#include <asm/suspend.h>
26 27
27#include "power.h" 28#include "power.h"
@@ -655,32 +656,42 @@ static int software_resume(void)
655 * here to avoid lockdep complaining. 656 * here to avoid lockdep complaining.
656 */ 657 */
657 mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING); 658 mutex_lock_nested(&pm_mutex, SINGLE_DEPTH_NESTING);
659
660 if (swsusp_resume_device)
661 goto Check_image;
662
663 if (!strlen(resume_file)) {
664 error = -ENOENT;
665 goto Unlock;
666 }
667
668 pr_debug("PM: Checking image partition %s\n", resume_file);
669
670 /* Check if the device is there */
671 swsusp_resume_device = name_to_dev_t(resume_file);
658 if (!swsusp_resume_device) { 672 if (!swsusp_resume_device) {
659 if (!strlen(resume_file)) {
660 mutex_unlock(&pm_mutex);
661 return -ENOENT;
662 }
663 /* 673 /*
664 * Some device discovery might still be in progress; we need 674 * Some device discovery might still be in progress; we need
665 * to wait for this to finish. 675 * to wait for this to finish.
666 */ 676 */
667 wait_for_device_probe(); 677 wait_for_device_probe();
678 /*
679 * We can't depend on SCSI devices being available after loading
680 * one of their modules until scsi_complete_async_scans() is
681 * called and the resume device usually is a SCSI one.
682 */
683 scsi_complete_async_scans();
684
668 swsusp_resume_device = name_to_dev_t(resume_file); 685 swsusp_resume_device = name_to_dev_t(resume_file);
669 pr_debug("PM: Resume from partition %s\n", resume_file); 686 if (!swsusp_resume_device) {
670 } else { 687 error = -ENODEV;
671 pr_debug("PM: Resume from partition %d:%d\n", 688 goto Unlock;
672 MAJOR(swsusp_resume_device), 689 }
673 MINOR(swsusp_resume_device));
674 } 690 }
675 691
676 if (noresume) { 692 Check_image:
677 /** 693 pr_debug("PM: Resume from partition %d:%d\n",
678 * FIXME: If noresume is specified, we need to find the 694 MAJOR(swsusp_resume_device), MINOR(swsusp_resume_device));
679 * partition and reset it back to normal swap space.
680 */
681 mutex_unlock(&pm_mutex);
682 return 0;
683 }
684 695
685 pr_debug("PM: Checking hibernation image.\n"); 696 pr_debug("PM: Checking hibernation image.\n");
686 error = swsusp_check(); 697 error = swsusp_check();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index f172f41858bb..f99ed6a75eac 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -291,20 +291,26 @@ static int suspend_enter(suspend_state_t state)
291 291
292 device_pm_lock(); 292 device_pm_lock();
293 293
294 if (suspend_ops->prepare) {
295 error = suspend_ops->prepare();
296 if (error)
297 goto Done;
298 }
299
294 error = device_power_down(PMSG_SUSPEND); 300 error = device_power_down(PMSG_SUSPEND);
295 if (error) { 301 if (error) {
296 printk(KERN_ERR "PM: Some devices failed to power down\n"); 302 printk(KERN_ERR "PM: Some devices failed to power down\n");
297 goto Done; 303 goto Platfrom_finish;
298 } 304 }
299 305
300 if (suspend_ops->prepare) { 306 if (suspend_ops->prepare_late) {
301 error = suspend_ops->prepare(); 307 error = suspend_ops->prepare_late();
302 if (error) 308 if (error)
303 goto Power_up_devices; 309 goto Power_up_devices;
304 } 310 }
305 311
306 if (suspend_test(TEST_PLATFORM)) 312 if (suspend_test(TEST_PLATFORM))
307 goto Platfrom_finish; 313 goto Platform_wake;
308 314
309 error = disable_nonboot_cpus(); 315 error = disable_nonboot_cpus();
310 if (error || suspend_test(TEST_CPUS)) 316 if (error || suspend_test(TEST_CPUS))
@@ -326,13 +332,17 @@ static int suspend_enter(suspend_state_t state)
326 Enable_cpus: 332 Enable_cpus:
327 enable_nonboot_cpus(); 333 enable_nonboot_cpus();
328 334
329 Platfrom_finish: 335 Platform_wake:
330 if (suspend_ops->finish) 336 if (suspend_ops->wake)
331 suspend_ops->finish(); 337 suspend_ops->wake();
332 338
333 Power_up_devices: 339 Power_up_devices:
334 device_power_up(PMSG_RESUME); 340 device_power_up(PMSG_RESUME);
335 341
342 Platfrom_finish:
343 if (suspend_ops->finish)
344 suspend_ops->finish();
345
336 Done: 346 Done:
337 device_pm_unlock(); 347 device_pm_unlock();
338 348
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 505f319e489c..8ba052c86d48 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -64,8 +64,6 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
64 struct bio *bio; 64 struct bio *bio;
65 65
66 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); 66 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
67 if (!bio)
68 return -ENOMEM;
69 bio->bi_sector = page_off * (PAGE_SIZE >> 9); 67 bio->bi_sector = page_off * (PAGE_SIZE >> 9);
70 bio->bi_bdev = resume_bdev; 68 bio->bi_bdev = resume_bdev;
71 bio->bi_end_io = end_swap_bio_read; 69 bio->bi_end_io = end_swap_bio_read;
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 6c85359364f2..ed97375daae9 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -24,6 +24,7 @@
24#include <linux/cpu.h> 24#include <linux/cpu.h>
25#include <linux/freezer.h> 25#include <linux/freezer.h>
26#include <linux/smp_lock.h> 26#include <linux/smp_lock.h>
27#include <scsi/scsi_scan.h>
27 28
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29 30
@@ -92,6 +93,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
92 filp->private_data = data; 93 filp->private_data = data;
93 memset(&data->handle, 0, sizeof(struct snapshot_handle)); 94 memset(&data->handle, 0, sizeof(struct snapshot_handle));
94 if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { 95 if ((filp->f_flags & O_ACCMODE) == O_RDONLY) {
96 /* Hibernating. The image device should be accessible. */
95 data->swap = swsusp_resume_device ? 97 data->swap = swsusp_resume_device ?
96 swap_type_of(swsusp_resume_device, 0, NULL) : -1; 98 swap_type_of(swsusp_resume_device, 0, NULL) : -1;
97 data->mode = O_RDONLY; 99 data->mode = O_RDONLY;
@@ -99,6 +101,13 @@ static int snapshot_open(struct inode *inode, struct file *filp)
99 if (error) 101 if (error)
100 pm_notifier_call_chain(PM_POST_HIBERNATION); 102 pm_notifier_call_chain(PM_POST_HIBERNATION);
101 } else { 103 } else {
104 /*
105 * Resuming. We may need to wait for the image device to
106 * appear.
107 */
108 wait_for_device_probe();
109 scsi_complete_async_scans();
110
102 data->swap = -1; 111 data->swap = -1;
103 data->mode = O_WRONLY; 112 data->mode = O_WRONLY;
104 error = pm_notifier_call_chain(PM_RESTORE_PREPARE); 113 error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index aaad0ec34194..0692ab5a0d67 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -21,9 +21,7 @@
21#include <linux/audit.h> 21#include <linux/audit.h>
22#include <linux/pid_namespace.h> 22#include <linux/pid_namespace.h>
23#include <linux/syscalls.h> 23#include <linux/syscalls.h>
24 24#include <linux/uaccess.h>
25#include <asm/pgtable.h>
26#include <asm/uaccess.h>
27 25
28 26
29/* 27/*
@@ -48,7 +46,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
48 list_add(&child->ptrace_entry, &new_parent->ptraced); 46 list_add(&child->ptrace_entry, &new_parent->ptraced);
49 child->parent = new_parent; 47 child->parent = new_parent;
50} 48}
51 49
52/* 50/*
53 * Turn a tracing stop into a normal stop now, since with no tracer there 51 * Turn a tracing stop into a normal stop now, since with no tracer there
54 * would be no way to wake it up with SIGCONT or SIGKILL. If there was a 52 * would be no way to wake it up with SIGCONT or SIGKILL. If there was a
@@ -173,7 +171,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
173 task_lock(task); 171 task_lock(task);
174 err = __ptrace_may_access(task, mode); 172 err = __ptrace_may_access(task, mode);
175 task_unlock(task); 173 task_unlock(task);
176 return (!err ? true : false); 174 return !err;
177} 175}
178 176
179int ptrace_attach(struct task_struct *task) 177int ptrace_attach(struct task_struct *task)
@@ -190,7 +188,7 @@ int ptrace_attach(struct task_struct *task)
190 /* Protect exec's credential calculations against our interference; 188 /* Protect exec's credential calculations against our interference;
191 * SUID, SGID and LSM creds get determined differently under ptrace. 189 * SUID, SGID and LSM creds get determined differently under ptrace.
192 */ 190 */
193 retval = mutex_lock_interruptible(&current->cred_exec_mutex); 191 retval = mutex_lock_interruptible(&task->cred_exec_mutex);
194 if (retval < 0) 192 if (retval < 0)
195 goto out; 193 goto out;
196 194
@@ -234,7 +232,7 @@ repeat:
234bad: 232bad:
235 write_unlock_irqrestore(&tasklist_lock, flags); 233 write_unlock_irqrestore(&tasklist_lock, flags);
236 task_unlock(task); 234 task_unlock(task);
237 mutex_unlock(&current->cred_exec_mutex); 235 mutex_unlock(&task->cred_exec_mutex);
238out: 236out:
239 return retval; 237 return retval;
240} 238}
@@ -358,7 +356,7 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst
358 copied += retval; 356 copied += retval;
359 src += retval; 357 src += retval;
360 dst += retval; 358 dst += retval;
361 len -= retval; 359 len -= retval;
362 } 360 }
363 return copied; 361 return copied;
364} 362}
@@ -383,7 +381,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
383 copied += retval; 381 copied += retval;
384 src += retval; 382 src += retval;
385 dst += retval; 383 dst += retval;
386 len -= retval; 384 len -= retval;
387 } 385 }
388 return copied; 386 return copied;
389} 387}
@@ -496,9 +494,9 @@ static int ptrace_resume(struct task_struct *child, long request, long data)
496 if (unlikely(!arch_has_single_step())) 494 if (unlikely(!arch_has_single_step()))
497 return -EIO; 495 return -EIO;
498 user_enable_single_step(child); 496 user_enable_single_step(child);
499 } 497 } else {
500 else
501 user_disable_single_step(child); 498 user_disable_single_step(child);
499 }
502 500
503 child->exit_code = data; 501 child->exit_code = data;
504 wake_up_process(child); 502 wake_up_process(child);
@@ -606,10 +604,11 @@ repeat:
606 ret = security_ptrace_traceme(current->parent); 604 ret = security_ptrace_traceme(current->parent);
607 605
608 /* 606 /*
609 * Set the ptrace bit in the process ptrace flags. 607 * Check PF_EXITING to ensure ->real_parent has not passed
610 * Then link us on our parent's ptraced list. 608 * exit_ptrace(). Otherwise we don't report the error but
609 * pretend ->real_parent untraces us right after return.
611 */ 610 */
612 if (!ret) { 611 if (!ret && !(current->real_parent->flags & PF_EXITING)) {
613 current->ptrace |= PT_PTRACED; 612 current->ptrace |= PT_PTRACED;
614 __ptrace_link(current, current->real_parent); 613 __ptrace_link(current, current->real_parent);
615 } 614 }
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 2c7b8457d0d2..a967c9feb90a 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -58,6 +58,10 @@ static DEFINE_MUTEX(rcu_barrier_mutex);
58static struct completion rcu_barrier_completion; 58static struct completion rcu_barrier_completion;
59int rcu_scheduler_active __read_mostly; 59int rcu_scheduler_active __read_mostly;
60 60
61static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
62static struct rcu_head rcu_migrate_head[3];
63static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
64
61/* 65/*
62 * Awaken the corresponding synchronize_rcu() instance now that a 66 * Awaken the corresponding synchronize_rcu() instance now that a
63 * grace period has elapsed. 67 * grace period has elapsed.
@@ -122,7 +126,10 @@ static void rcu_barrier_func(void *type)
122 } 126 }
123} 127}
124 128
125static inline void wait_migrated_callbacks(void); 129static inline void wait_migrated_callbacks(void)
130{
131 wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
132}
126 133
127/* 134/*
128 * Orchestrate the specified type of RCU barrier, waiting for all 135 * Orchestrate the specified type of RCU barrier, waiting for all
@@ -179,21 +186,12 @@ void rcu_barrier_sched(void)
179} 186}
180EXPORT_SYMBOL_GPL(rcu_barrier_sched); 187EXPORT_SYMBOL_GPL(rcu_barrier_sched);
181 188
182static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
183static struct rcu_head rcu_migrate_head[3];
184static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
185
186static void rcu_migrate_callback(struct rcu_head *notused) 189static void rcu_migrate_callback(struct rcu_head *notused)
187{ 190{
188 if (atomic_dec_and_test(&rcu_migrate_type_count)) 191 if (atomic_dec_and_test(&rcu_migrate_type_count))
189 wake_up(&rcu_migrate_wq); 192 wake_up(&rcu_migrate_wq);
190} 193}
191 194
192static inline void wait_migrated_callbacks(void)
193{
194 wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
195}
196
197static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self, 195static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
198 unsigned long action, void *hcpu) 196 unsigned long action, void *hcpu)
199{ 197{
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 7f3266922572..d2a372fb0b9b 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -530,8 +530,6 @@ static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
530 rdp->qs_pending = 1; 530 rdp->qs_pending = 1;
531 rdp->passed_quiesc = 0; 531 rdp->passed_quiesc = 0;
532 rdp->gpnum = rsp->gpnum; 532 rdp->gpnum = rsp->gpnum;
533 rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
534 RCU_JIFFIES_TILL_FORCE_QS;
535} 533}
536 534
537/* 535/*
@@ -578,8 +576,6 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
578 rsp->gpnum++; 576 rsp->gpnum++;
579 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 577 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
580 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 578 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
581 rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
582 RCU_JIFFIES_TILL_FORCE_QS;
583 record_gp_stall_check_time(rsp); 579 record_gp_stall_check_time(rsp);
584 dyntick_record_completed(rsp, rsp->completed - 1); 580 dyntick_record_completed(rsp, rsp->completed - 1);
585 note_new_gpnum(rsp, rdp); 581 note_new_gpnum(rsp, rdp);
@@ -1055,7 +1051,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1055{ 1051{
1056 unsigned long flags; 1052 unsigned long flags;
1057 long lastcomp; 1053 long lastcomp;
1058 struct rcu_data *rdp = rsp->rda[smp_processor_id()];
1059 struct rcu_node *rnp = rcu_get_root(rsp); 1054 struct rcu_node *rnp = rcu_get_root(rsp);
1060 u8 signaled; 1055 u8 signaled;
1061 1056
@@ -1066,16 +1061,13 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1066 return; /* Someone else is already on the job. */ 1061 return; /* Someone else is already on the job. */
1067 } 1062 }
1068 if (relaxed && 1063 if (relaxed &&
1069 (long)(rsp->jiffies_force_qs - jiffies) >= 0 && 1064 (long)(rsp->jiffies_force_qs - jiffies) >= 0)
1070 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) >= 0)
1071 goto unlock_ret; /* no emergency and done recently. */ 1065 goto unlock_ret; /* no emergency and done recently. */
1072 rsp->n_force_qs++; 1066 rsp->n_force_qs++;
1073 spin_lock(&rnp->lock); 1067 spin_lock(&rnp->lock);
1074 lastcomp = rsp->completed; 1068 lastcomp = rsp->completed;
1075 signaled = rsp->signaled; 1069 signaled = rsp->signaled;
1076 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 1070 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
1077 rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
1078 RCU_JIFFIES_TILL_FORCE_QS;
1079 if (lastcomp == rsp->gpnum) { 1071 if (lastcomp == rsp->gpnum) {
1080 rsp->n_force_qs_ngp++; 1072 rsp->n_force_qs_ngp++;
1081 spin_unlock(&rnp->lock); 1073 spin_unlock(&rnp->lock);
@@ -1144,8 +1136,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1144 * If an RCU GP has gone long enough, go check for dyntick 1136 * If an RCU GP has gone long enough, go check for dyntick
1145 * idle CPUs and, if needed, send resched IPIs. 1137 * idle CPUs and, if needed, send resched IPIs.
1146 */ 1138 */
1147 if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || 1139 if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
1148 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
1149 force_quiescent_state(rsp, 1); 1140 force_quiescent_state(rsp, 1);
1150 1141
1151 /* 1142 /*
@@ -1230,8 +1221,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1230 if (unlikely(++rdp->qlen > qhimark)) { 1221 if (unlikely(++rdp->qlen > qhimark)) {
1231 rdp->blimit = LONG_MAX; 1222 rdp->blimit = LONG_MAX;
1232 force_quiescent_state(rsp, 0); 1223 force_quiescent_state(rsp, 0);
1233 } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || 1224 } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
1234 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
1235 force_quiescent_state(rsp, 1); 1225 force_quiescent_state(rsp, 1);
1236 local_irq_restore(flags); 1226 local_irq_restore(flags);
1237} 1227}
@@ -1290,8 +1280,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1290 1280
1291 /* Has an RCU GP gone long enough to send resched IPIs &c? */ 1281 /* Has an RCU GP gone long enough to send resched IPIs &c? */
1292 if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) && 1282 if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
1293 ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || 1283 ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0))
1294 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0))
1295 return 1; 1284 return 1;
1296 1285
1297 /* nothing to do */ 1286 /* nothing to do */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 4ee954f6a8d5..4b1875ba9404 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -49,14 +49,12 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
49{ 49{
50 if (!rdp->beenonline) 50 if (!rdp->beenonline)
51 return; 51 return;
52 seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d rpfq=%ld rp=%x", 52 seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d",
53 rdp->cpu, 53 rdp->cpu,
54 cpu_is_offline(rdp->cpu) ? '!' : ' ', 54 cpu_is_offline(rdp->cpu) ? '!' : ' ',
55 rdp->completed, rdp->gpnum, 55 rdp->completed, rdp->gpnum,
56 rdp->passed_quiesc, rdp->passed_quiesc_completed, 56 rdp->passed_quiesc, rdp->passed_quiesc_completed,
57 rdp->qs_pending, 57 rdp->qs_pending);
58 rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
59 (int)(rdp->n_rcu_pending & 0xffff));
60#ifdef CONFIG_NO_HZ 58#ifdef CONFIG_NO_HZ
61 seq_printf(m, " dt=%d/%d dn=%d df=%lu", 59 seq_printf(m, " dt=%d/%d dn=%d df=%lu",
62 rdp->dynticks->dynticks, 60 rdp->dynticks->dynticks,
@@ -102,14 +100,12 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
102{ 100{
103 if (!rdp->beenonline) 101 if (!rdp->beenonline)
104 return; 102 return;
105 seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d,%ld,%ld", 103 seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d",
106 rdp->cpu, 104 rdp->cpu,
107 cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"", 105 cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"",
108 rdp->completed, rdp->gpnum, 106 rdp->completed, rdp->gpnum,
109 rdp->passed_quiesc, rdp->passed_quiesc_completed, 107 rdp->passed_quiesc, rdp->passed_quiesc_completed,
110 rdp->qs_pending, 108 rdp->qs_pending);
111 rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
112 rdp->n_rcu_pending);
113#ifdef CONFIG_NO_HZ 109#ifdef CONFIG_NO_HZ
114 seq_printf(m, ",%d,%d,%d,%lu", 110 seq_printf(m, ",%d,%d,%d,%lu",
115 rdp->dynticks->dynticks, 111 rdp->dynticks->dynticks,
@@ -123,7 +119,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
123 119
124static int show_rcudata_csv(struct seq_file *m, void *unused) 120static int show_rcudata_csv(struct seq_file *m, void *unused)
125{ 121{
126 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",\"rpfq\",\"rp\","); 122 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",");
127#ifdef CONFIG_NO_HZ 123#ifdef CONFIG_NO_HZ
128 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); 124 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
129#endif /* #ifdef CONFIG_NO_HZ */ 125#endif /* #ifdef CONFIG_NO_HZ */
diff --git a/kernel/resource.c b/kernel/resource.c
index fd5d7d574bb9..ac5f3a36923f 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -533,43 +533,21 @@ static void __init __reserve_region_with_split(struct resource *root,
533 res->end = end; 533 res->end = end;
534 res->flags = IORESOURCE_BUSY; 534 res->flags = IORESOURCE_BUSY;
535 535
536 for (;;) { 536 conflict = __request_resource(parent, res);
537 conflict = __request_resource(parent, res); 537 if (!conflict)
538 if (!conflict) 538 return;
539 break;
540 if (conflict != parent) {
541 parent = conflict;
542 if (!(conflict->flags & IORESOURCE_BUSY))
543 continue;
544 }
545
546 /* Uhhuh, that didn't work out.. */
547 kfree(res);
548 res = NULL;
549 break;
550 }
551
552 if (!res) {
553 /* failed, split and try again */
554
555 /* conflict covered whole area */
556 if (conflict->start <= start && conflict->end >= end)
557 return;
558 539
559 if (conflict->start > start) 540 /* failed, split and try again */
560 __reserve_region_with_split(root, start, conflict->start-1, name); 541 kfree(res);
561 if (!(conflict->flags & IORESOURCE_BUSY)) {
562 resource_size_t common_start, common_end;
563 542
564 common_start = max(conflict->start, start); 543 /* conflict covered whole area */
565 common_end = min(conflict->end, end); 544 if (conflict->start <= start && conflict->end >= end)
566 if (common_start < common_end) 545 return;
567 __reserve_region_with_split(root, common_start, common_end, name);
568 }
569 if (conflict->end < end)
570 __reserve_region_with_split(root, conflict->end+1, end, name);
571 }
572 546
547 if (conflict->start > start)
548 __reserve_region_with_split(root, start, conflict->start-1, name);
549 if (conflict->end < end)
550 __reserve_region_with_split(root, conflict->end+1, end, name);
573} 551}
574 552
575void __init reserve_region_with_split(struct resource *root, 553void __init reserve_region_with_split(struct resource *root,
diff --git a/kernel/sched.c b/kernel/sched.c
index b38bd96098f6..9e0fd1ef1a47 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1418,10 +1418,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
1418 struct rq_iterator *iterator); 1418 struct rq_iterator *iterator);
1419#endif 1419#endif
1420 1420
1421/* Time spent by the tasks of the cpu accounting group executing in ... */
1422enum cpuacct_stat_index {
1423 CPUACCT_STAT_USER, /* ... user mode */
1424 CPUACCT_STAT_SYSTEM, /* ... kernel mode */
1425
1426 CPUACCT_STAT_NSTATS,
1427};
1428
1421#ifdef CONFIG_CGROUP_CPUACCT 1429#ifdef CONFIG_CGROUP_CPUACCT
1422static void cpuacct_charge(struct task_struct *tsk, u64 cputime); 1430static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
1431static void cpuacct_update_stats(struct task_struct *tsk,
1432 enum cpuacct_stat_index idx, cputime_t val);
1423#else 1433#else
1424static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} 1434static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
1435static inline void cpuacct_update_stats(struct task_struct *tsk,
1436 enum cpuacct_stat_index idx, cputime_t val) {}
1425#endif 1437#endif
1426 1438
1427static inline void inc_cpu_load(struct rq *rq, unsigned long load) 1439static inline void inc_cpu_load(struct rq *rq, unsigned long load)
@@ -4511,9 +4523,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
4511EXPORT_PER_CPU_SYMBOL(kstat); 4523EXPORT_PER_CPU_SYMBOL(kstat);
4512 4524
4513/* 4525/*
4514 * Return any ns on the sched_clock that have not yet been banked in 4526 * Return any ns on the sched_clock that have not yet been accounted in
4515 * @p in case that task is currently running. 4527 * @p in case that task is currently running.
4528 *
4529 * Called with task_rq_lock() held on @rq.
4516 */ 4530 */
4531static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
4532{
4533 u64 ns = 0;
4534
4535 if (task_current(rq, p)) {
4536 update_rq_clock(rq);
4537 ns = rq->clock - p->se.exec_start;
4538 if ((s64)ns < 0)
4539 ns = 0;
4540 }
4541
4542 return ns;
4543}
4544
4517unsigned long long task_delta_exec(struct task_struct *p) 4545unsigned long long task_delta_exec(struct task_struct *p)
4518{ 4546{
4519 unsigned long flags; 4547 unsigned long flags;
@@ -4521,16 +4549,49 @@ unsigned long long task_delta_exec(struct task_struct *p)
4521 u64 ns = 0; 4549 u64 ns = 0;
4522 4550
4523 rq = task_rq_lock(p, &flags); 4551 rq = task_rq_lock(p, &flags);
4552 ns = do_task_delta_exec(p, rq);
4553 task_rq_unlock(rq, &flags);
4524 4554
4525 if (task_current(rq, p)) { 4555 return ns;
4526 u64 delta_exec; 4556}
4527 4557
4528 update_rq_clock(rq); 4558/*
4529 delta_exec = rq->clock - p->se.exec_start; 4559 * Return accounted runtime for the task.
4530 if ((s64)delta_exec > 0) 4560 * In case the task is currently running, return the runtime plus current's
4531 ns = delta_exec; 4561 * pending runtime that have not been accounted yet.
4532 } 4562 */
4563unsigned long long task_sched_runtime(struct task_struct *p)
4564{
4565 unsigned long flags;
4566 struct rq *rq;
4567 u64 ns = 0;
4568
4569 rq = task_rq_lock(p, &flags);
4570 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
4571 task_rq_unlock(rq, &flags);
4572
4573 return ns;
4574}
4575
4576/*
4577 * Return sum_exec_runtime for the thread group.
4578 * In case the task is currently running, return the sum plus current's
4579 * pending runtime that have not been accounted yet.
4580 *
4581 * Note that the thread group might have other running tasks as well,
4582 * so the return value not includes other pending runtime that other
4583 * running tasks might have.
4584 */
4585unsigned long long thread_group_sched_runtime(struct task_struct *p)
4586{
4587 struct task_cputime totals;
4588 unsigned long flags;
4589 struct rq *rq;
4590 u64 ns;
4533 4591
4592 rq = task_rq_lock(p, &flags);
4593 thread_group_cputime(p, &totals);
4594 ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
4534 task_rq_unlock(rq, &flags); 4595 task_rq_unlock(rq, &flags);
4535 4596
4536 return ns; 4597 return ns;
@@ -4559,6 +4620,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
4559 cpustat->nice = cputime64_add(cpustat->nice, tmp); 4620 cpustat->nice = cputime64_add(cpustat->nice, tmp);
4560 else 4621 else
4561 cpustat->user = cputime64_add(cpustat->user, tmp); 4622 cpustat->user = cputime64_add(cpustat->user, tmp);
4623
4624 cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
4562 /* Account for user time used */ 4625 /* Account for user time used */
4563 acct_update_integrals(p); 4626 acct_update_integrals(p);
4564} 4627}
@@ -4620,6 +4683,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
4620 else 4683 else
4621 cpustat->system = cputime64_add(cpustat->system, tmp); 4684 cpustat->system = cputime64_add(cpustat->system, tmp);
4622 4685
4686 cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
4687
4623 /* Account for system time used */ 4688 /* Account for system time used */
4624 acct_update_integrals(p); 4689 acct_update_integrals(p);
4625} 4690}
@@ -4667,7 +4732,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
4667 4732
4668 if (user_tick) 4733 if (user_tick)
4669 account_user_time(p, one_jiffy, one_jiffy_scaled); 4734 account_user_time(p, one_jiffy, one_jiffy_scaled);
4670 else if (p != rq->idle) 4735 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
4671 account_system_time(p, HARDIRQ_OFFSET, one_jiffy, 4736 account_system_time(p, HARDIRQ_OFFSET, one_jiffy,
4672 one_jiffy_scaled); 4737 one_jiffy_scaled);
4673 else 4738 else
@@ -4781,7 +4846,7 @@ void scheduler_tick(void)
4781#endif 4846#endif
4782} 4847}
4783 4848
4784unsigned long get_parent_ip(unsigned long addr) 4849notrace unsigned long get_parent_ip(unsigned long addr)
4785{ 4850{
4786 if (in_lock_functions(addr)) { 4851 if (in_lock_functions(addr)) {
4787 addr = CALLER_ADDR2; 4852 addr = CALLER_ADDR2;
@@ -7302,7 +7367,12 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
7302 cpumask_or(groupmask, groupmask, sched_group_cpus(group)); 7367 cpumask_or(groupmask, groupmask, sched_group_cpus(group));
7303 7368
7304 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); 7369 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
7370
7305 printk(KERN_CONT " %s", str); 7371 printk(KERN_CONT " %s", str);
7372 if (group->__cpu_power != SCHED_LOAD_SCALE) {
7373 printk(KERN_CONT " (__cpu_power = %d)",
7374 group->__cpu_power);
7375 }
7306 7376
7307 group = group->next; 7377 group = group->next;
7308 } while (group != sd->groups); 7378 } while (group != sd->groups);
@@ -9925,6 +9995,7 @@ struct cpuacct {
9925 struct cgroup_subsys_state css; 9995 struct cgroup_subsys_state css;
9926 /* cpuusage holds pointer to a u64-type object on every cpu */ 9996 /* cpuusage holds pointer to a u64-type object on every cpu */
9927 u64 *cpuusage; 9997 u64 *cpuusage;
9998 struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
9928 struct cpuacct *parent; 9999 struct cpuacct *parent;
9929}; 10000};
9930 10001
@@ -9949,20 +10020,32 @@ static struct cgroup_subsys_state *cpuacct_create(
9949 struct cgroup_subsys *ss, struct cgroup *cgrp) 10020 struct cgroup_subsys *ss, struct cgroup *cgrp)
9950{ 10021{
9951 struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); 10022 struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
10023 int i;
9952 10024
9953 if (!ca) 10025 if (!ca)
9954 return ERR_PTR(-ENOMEM); 10026 goto out;
9955 10027
9956 ca->cpuusage = alloc_percpu(u64); 10028 ca->cpuusage = alloc_percpu(u64);
9957 if (!ca->cpuusage) { 10029 if (!ca->cpuusage)
9958 kfree(ca); 10030 goto out_free_ca;
9959 return ERR_PTR(-ENOMEM); 10031
9960 } 10032 for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
10033 if (percpu_counter_init(&ca->cpustat[i], 0))
10034 goto out_free_counters;
9961 10035
9962 if (cgrp->parent) 10036 if (cgrp->parent)
9963 ca->parent = cgroup_ca(cgrp->parent); 10037 ca->parent = cgroup_ca(cgrp->parent);
9964 10038
9965 return &ca->css; 10039 return &ca->css;
10040
10041out_free_counters:
10042 while (--i >= 0)
10043 percpu_counter_destroy(&ca->cpustat[i]);
10044 free_percpu(ca->cpuusage);
10045out_free_ca:
10046 kfree(ca);
10047out:
10048 return ERR_PTR(-ENOMEM);
9966} 10049}
9967 10050
9968/* destroy an existing cpu accounting group */ 10051/* destroy an existing cpu accounting group */
@@ -9970,7 +10053,10 @@ static void
9970cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) 10053cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
9971{ 10054{
9972 struct cpuacct *ca = cgroup_ca(cgrp); 10055 struct cpuacct *ca = cgroup_ca(cgrp);
10056 int i;
9973 10057
10058 for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
10059 percpu_counter_destroy(&ca->cpustat[i]);
9974 free_percpu(ca->cpuusage); 10060 free_percpu(ca->cpuusage);
9975 kfree(ca); 10061 kfree(ca);
9976} 10062}
@@ -10057,6 +10143,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
10057 return 0; 10143 return 0;
10058} 10144}
10059 10145
10146static const char *cpuacct_stat_desc[] = {
10147 [CPUACCT_STAT_USER] = "user",
10148 [CPUACCT_STAT_SYSTEM] = "system",
10149};
10150
10151static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
10152 struct cgroup_map_cb *cb)
10153{
10154 struct cpuacct *ca = cgroup_ca(cgrp);
10155 int i;
10156
10157 for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
10158 s64 val = percpu_counter_read(&ca->cpustat[i]);
10159 val = cputime64_to_clock_t(val);
10160 cb->fill(cb, cpuacct_stat_desc[i], val);
10161 }
10162 return 0;
10163}
10164
10060static struct cftype files[] = { 10165static struct cftype files[] = {
10061 { 10166 {
10062 .name = "usage", 10167 .name = "usage",
@@ -10067,7 +10172,10 @@ static struct cftype files[] = {
10067 .name = "usage_percpu", 10172 .name = "usage_percpu",
10068 .read_seq_string = cpuacct_percpu_seq_read, 10173 .read_seq_string = cpuacct_percpu_seq_read,
10069 }, 10174 },
10070 10175 {
10176 .name = "stat",
10177 .read_map = cpuacct_stats_show,
10178 },
10071}; 10179};
10072 10180
10073static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) 10181static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@ -10089,12 +10197,38 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
10089 return; 10197 return;
10090 10198
10091 cpu = task_cpu(tsk); 10199 cpu = task_cpu(tsk);
10200
10201 rcu_read_lock();
10202
10092 ca = task_ca(tsk); 10203 ca = task_ca(tsk);
10093 10204
10094 for (; ca; ca = ca->parent) { 10205 for (; ca; ca = ca->parent) {
10095 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); 10206 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
10096 *cpuusage += cputime; 10207 *cpuusage += cputime;
10097 } 10208 }
10209
10210 rcu_read_unlock();
10211}
10212
10213/*
10214 * Charge the system/user time to the task's accounting group.
10215 */
10216static void cpuacct_update_stats(struct task_struct *tsk,
10217 enum cpuacct_stat_index idx, cputime_t val)
10218{
10219 struct cpuacct *ca;
10220
10221 if (unlikely(!cpuacct_subsys.active))
10222 return;
10223
10224 rcu_read_lock();
10225 ca = task_ca(tsk);
10226
10227 do {
10228 percpu_counter_add(&ca->cpustat[idx], val);
10229 ca = ca->parent;
10230 } while (ca);
10231 rcu_read_unlock();
10098} 10232}
10099 10233
10100struct cgroup_subsys cpuacct_subsys = { 10234struct cgroup_subsys cpuacct_subsys = {
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 1e00bfacf9b8..cdd3c89574cd 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -55,7 +55,7 @@ static int convert_prio(int prio)
55 * cpupri_find - find the best (lowest-pri) CPU in the system 55 * cpupri_find - find the best (lowest-pri) CPU in the system
56 * @cp: The cpupri context 56 * @cp: The cpupri context
57 * @p: The task 57 * @p: The task
58 * @lowest_mask: A mask to fill in with selected CPUs 58 * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
59 * 59 *
60 * Note: This function returns the recommended CPUs as calculated during the 60 * Note: This function returns the recommended CPUs as calculated during the
61 * current invokation. By the time the call returns, the CPUs may have in 61 * current invokation. By the time the call returns, the CPUs may have in
@@ -81,7 +81,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) 81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
82 continue; 82 continue;
83 83
84 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); 84 if (lowest_mask)
85 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
85 return 1; 86 return 1;
86 } 87 }
87 88
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 299d012b4394..f2c66f8f9712 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -948,20 +948,15 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
948 948
949static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 949static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
950{ 950{
951 cpumask_var_t mask;
952
953 if (rq->curr->rt.nr_cpus_allowed == 1) 951 if (rq->curr->rt.nr_cpus_allowed == 1)
954 return; 952 return;
955 953
956 if (!alloc_cpumask_var(&mask, GFP_ATOMIC))
957 return;
958
959 if (p->rt.nr_cpus_allowed != 1 954 if (p->rt.nr_cpus_allowed != 1
960 && cpupri_find(&rq->rd->cpupri, p, mask)) 955 && cpupri_find(&rq->rd->cpupri, p, NULL))
961 goto free; 956 return;
962 957
963 if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask)) 958 if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
964 goto free; 959 return;
965 960
966 /* 961 /*
967 * There appears to be other cpus that can accept 962 * There appears to be other cpus that can accept
@@ -970,8 +965,6 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
970 */ 965 */
971 requeue_task_rt(rq, p, 1); 966 requeue_task_rt(rq, p, 1);
972 resched_task(rq->curr); 967 resched_task(rq->curr);
973free:
974 free_cpumask_var(mask);
975} 968}
976 969
977#endif /* CONFIG_SMP */ 970#endif /* CONFIG_SMP */
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
index cf2bc01186ef..b28d19135f43 100644
--- a/kernel/slow-work.c
+++ b/kernel/slow-work.c
@@ -609,14 +609,14 @@ void slow_work_unregister_user(void)
609 if (slow_work_user_count == 0) { 609 if (slow_work_user_count == 0) {
610 printk(KERN_NOTICE "Slow work thread pool: Shutting down\n"); 610 printk(KERN_NOTICE "Slow work thread pool: Shutting down\n");
611 slow_work_threads_should_exit = true; 611 slow_work_threads_should_exit = true;
612 del_timer_sync(&slow_work_cull_timer);
613 del_timer_sync(&slow_work_oom_timer);
612 wake_up_all(&slow_work_thread_wq); 614 wake_up_all(&slow_work_thread_wq);
613 wait_for_completion(&slow_work_last_thread_exited); 615 wait_for_completion(&slow_work_last_thread_exited);
614 printk(KERN_NOTICE "Slow work thread pool:" 616 printk(KERN_NOTICE "Slow work thread pool:"
615 " Shut down complete\n"); 617 " Shut down complete\n");
616 } 618 }
617 619
618 del_timer_sync(&slow_work_cull_timer);
619
620 mutex_unlock(&slow_work_user_lock); 620 mutex_unlock(&slow_work_user_lock);
621} 621}
622EXPORT_SYMBOL(slow_work_unregister_user); 622EXPORT_SYMBOL(slow_work_unregister_user);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 2fecefacdc5b..b525dd348511 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -472,9 +472,9 @@ void tasklet_kill(struct tasklet_struct *t)
472 printk("Attempt to kill tasklet from interrupt\n"); 472 printk("Attempt to kill tasklet from interrupt\n");
473 473
474 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { 474 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
475 do 475 do {
476 yield(); 476 yield();
477 while (test_bit(TASKLET_STATE_SCHED, &t->state)); 477 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
478 } 478 }
479 tasklet_unlock_wait(t); 479 tasklet_unlock_wait(t);
480 clear_bit(TASKLET_STATE_SCHED, &t->state); 480 clear_bit(TASKLET_STATE_SCHED, &t->state);
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 85d5a2455103..88796c330838 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -166,97 +166,11 @@ void softlockup_tick(void)
166} 166}
167 167
168/* 168/*
169 * Have a reasonable limit on the number of tasks checked:
170 */
171unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
172
173/*
174 * Zero means infinite timeout - no checking done:
175 */
176unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
177
178unsigned long __read_mostly sysctl_hung_task_warnings = 10;
179
180/*
181 * Only do the hung-tasks check on one CPU:
182 */
183static int check_cpu __read_mostly = -1;
184
185static void check_hung_task(struct task_struct *t, unsigned long now)
186{
187 unsigned long switch_count = t->nvcsw + t->nivcsw;
188
189 if (t->flags & PF_FROZEN)
190 return;
191
192 if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
193 t->last_switch_count = switch_count;
194 t->last_switch_timestamp = now;
195 return;
196 }
197 if ((long)(now - t->last_switch_timestamp) <
198 sysctl_hung_task_timeout_secs)
199 return;
200 if (!sysctl_hung_task_warnings)
201 return;
202 sysctl_hung_task_warnings--;
203
204 /*
205 * Ok, the task did not get scheduled for more than 2 minutes,
206 * complain:
207 */
208 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
209 "%ld seconds.\n", t->comm, t->pid,
210 sysctl_hung_task_timeout_secs);
211 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
212 " disables this message.\n");
213 sched_show_task(t);
214 __debug_show_held_locks(t);
215
216 t->last_switch_timestamp = now;
217 touch_nmi_watchdog();
218
219 if (softlockup_panic)
220 panic("softlockup: blocked tasks");
221}
222
223/*
224 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
225 * a really long time (120 seconds). If that happens, print out
226 * a warning.
227 */
228static void check_hung_uninterruptible_tasks(int this_cpu)
229{
230 int max_count = sysctl_hung_task_check_count;
231 unsigned long now = get_timestamp(this_cpu);
232 struct task_struct *g, *t;
233
234 /*
235 * If the system crashed already then all bets are off,
236 * do not report extra hung tasks:
237 */
238 if (test_taint(TAINT_DIE) || did_panic)
239 return;
240
241 read_lock(&tasklist_lock);
242 do_each_thread(g, t) {
243 if (!--max_count)
244 goto unlock;
245 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
246 if (t->state == TASK_UNINTERRUPTIBLE)
247 check_hung_task(t, now);
248 } while_each_thread(g, t);
249 unlock:
250 read_unlock(&tasklist_lock);
251}
252
253/*
254 * The watchdog thread - runs every second and touches the timestamp. 169 * The watchdog thread - runs every second and touches the timestamp.
255 */ 170 */
256static int watchdog(void *__bind_cpu) 171static int watchdog(void *__bind_cpu)
257{ 172{
258 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 173 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
259 int this_cpu = (long)__bind_cpu;
260 174
261 sched_setscheduler(current, SCHED_FIFO, &param); 175 sched_setscheduler(current, SCHED_FIFO, &param);
262 176
@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu)
276 if (kthread_should_stop()) 190 if (kthread_should_stop())
277 break; 191 break;
278 192
279 if (this_cpu == check_cpu) {
280 if (sysctl_hung_task_timeout_secs)
281 check_hung_uninterruptible_tasks(this_cpu);
282 }
283
284 set_current_state(TASK_INTERRUPTIBLE); 193 set_current_state(TASK_INTERRUPTIBLE);
285 } 194 }
286 __set_current_state(TASK_RUNNING); 195 __set_current_state(TASK_RUNNING);
@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
312 break; 221 break;
313 case CPU_ONLINE: 222 case CPU_ONLINE:
314 case CPU_ONLINE_FROZEN: 223 case CPU_ONLINE_FROZEN:
315 check_cpu = cpumask_any(cpu_online_mask);
316 wake_up_process(per_cpu(watchdog_task, hotcpu)); 224 wake_up_process(per_cpu(watchdog_task, hotcpu));
317 break; 225 break;
318#ifdef CONFIG_HOTPLUG_CPU 226#ifdef CONFIG_HOTPLUG_CPU
319 case CPU_DOWN_PREPARE:
320 case CPU_DOWN_PREPARE_FROZEN:
321 if (hotcpu == check_cpu) {
322 /* Pick any other online cpu. */
323 check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
324 }
325 break;
326
327 case CPU_UP_CANCELED: 227 case CPU_UP_CANCELED:
328 case CPU_UP_CANCELED_FROZEN: 228 case CPU_UP_CANCELED_FROZEN:
329 if (!per_cpu(watchdog_task, hotcpu)) 229 if (!per_cpu(watchdog_task, hotcpu))
diff --git a/kernel/sys.c b/kernel/sys.c
index 51dbb55604e8..e7998cf31498 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -360,6 +360,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
360 void __user *, arg) 360 void __user *, arg)
361{ 361{
362 char buffer[256]; 362 char buffer[256];
363 int ret = 0;
363 364
364 /* We only trust the superuser with rebooting the system. */ 365 /* We only trust the superuser with rebooting the system. */
365 if (!capable(CAP_SYS_BOOT)) 366 if (!capable(CAP_SYS_BOOT))
@@ -397,7 +398,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
397 kernel_halt(); 398 kernel_halt();
398 unlock_kernel(); 399 unlock_kernel();
399 do_exit(0); 400 do_exit(0);
400 break; 401 panic("cannot halt");
401 402
402 case LINUX_REBOOT_CMD_POWER_OFF: 403 case LINUX_REBOOT_CMD_POWER_OFF:
403 kernel_power_off(); 404 kernel_power_off();
@@ -417,29 +418,22 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
417 418
418#ifdef CONFIG_KEXEC 419#ifdef CONFIG_KEXEC
419 case LINUX_REBOOT_CMD_KEXEC: 420 case LINUX_REBOOT_CMD_KEXEC:
420 { 421 ret = kernel_kexec();
421 int ret; 422 break;
422 ret = kernel_kexec();
423 unlock_kernel();
424 return ret;
425 }
426#endif 423#endif
427 424
428#ifdef CONFIG_HIBERNATION 425#ifdef CONFIG_HIBERNATION
429 case LINUX_REBOOT_CMD_SW_SUSPEND: 426 case LINUX_REBOOT_CMD_SW_SUSPEND:
430 { 427 ret = hibernate();
431 int ret = hibernate(); 428 break;
432 unlock_kernel();
433 return ret;
434 }
435#endif 429#endif
436 430
437 default: 431 default:
438 unlock_kernel(); 432 ret = -EINVAL;
439 return -EINVAL; 433 break;
440 } 434 }
441 unlock_kernel(); 435 unlock_kernel();
442 return 0; 436 return ret;
443} 437}
444 438
445static void deferred_cad(struct work_struct *dummy) 439static void deferred_cad(struct work_struct *dummy)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 72eb1a41dcab..ea78fa101ad6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -103,6 +103,9 @@ static unsigned long one_ul = 1;
103static int one_hundred = 100; 103static int one_hundred = 100;
104static int one_thousand = 1000; 104static int one_thousand = 1000;
105 105
106/* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
107static unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
108
106/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 109/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
107static int maxolduid = 65535; 110static int maxolduid = 65535;
108static int minolduid; 111static int minolduid;
@@ -814,6 +817,19 @@ static struct ctl_table kern_table[] = {
814 .extra1 = &neg_one, 817 .extra1 = &neg_one,
815 .extra2 = &sixty, 818 .extra2 = &sixty,
816 }, 819 },
820#endif
821#ifdef CONFIG_DETECT_HUNG_TASK
822 {
823 .ctl_name = CTL_UNNUMBERED,
824 .procname = "hung_task_panic",
825 .data = &sysctl_hung_task_panic,
826 .maxlen = sizeof(int),
827 .mode = 0644,
828 .proc_handler = &proc_dointvec_minmax,
829 .strategy = &sysctl_intvec,
830 .extra1 = &zero,
831 .extra2 = &one,
832 },
817 { 833 {
818 .ctl_name = CTL_UNNUMBERED, 834 .ctl_name = CTL_UNNUMBERED,
819 .procname = "hung_task_check_count", 835 .procname = "hung_task_check_count",
@@ -829,7 +845,7 @@ static struct ctl_table kern_table[] = {
829 .data = &sysctl_hung_task_timeout_secs, 845 .data = &sysctl_hung_task_timeout_secs,
830 .maxlen = sizeof(unsigned long), 846 .maxlen = sizeof(unsigned long),
831 .mode = 0644, 847 .mode = 0644,
832 .proc_handler = &proc_doulongvec_minmax, 848 .proc_handler = &proc_dohung_task_timeout_secs,
833 .strategy = &sysctl_intvec, 849 .strategy = &sysctl_intvec,
834 }, 850 },
835 { 851 {
@@ -889,16 +905,6 @@ static struct ctl_table kern_table[] = {
889 .proc_handler = &proc_dointvec, 905 .proc_handler = &proc_dointvec,
890 }, 906 },
891#endif 907#endif
892#ifdef CONFIG_UNEVICTABLE_LRU
893 {
894 .ctl_name = CTL_UNNUMBERED,
895 .procname = "scan_unevictable_pages",
896 .data = &scan_unevictable_pages,
897 .maxlen = sizeof(scan_unevictable_pages),
898 .mode = 0644,
899 .proc_handler = &scan_unevictable_handler,
900 },
901#endif
902#ifdef CONFIG_SLOW_WORK 908#ifdef CONFIG_SLOW_WORK
903 { 909 {
904 .ctl_name = CTL_UNNUMBERED, 910 .ctl_name = CTL_UNNUMBERED,
@@ -1003,7 +1009,7 @@ static struct ctl_table vm_table[] = {
1003 .mode = 0644, 1009 .mode = 0644,
1004 .proc_handler = &dirty_bytes_handler, 1010 .proc_handler = &dirty_bytes_handler,
1005 .strategy = &sysctl_intvec, 1011 .strategy = &sysctl_intvec,
1006 .extra1 = &one_ul, 1012 .extra1 = &dirty_bytes_min,
1007 }, 1013 },
1008 { 1014 {
1009 .procname = "dirty_writeback_centisecs", 1015 .procname = "dirty_writeback_centisecs",
@@ -1289,6 +1295,16 @@ static struct ctl_table vm_table[] = {
1289 .extra2 = &one, 1295 .extra2 = &one,
1290 }, 1296 },
1291#endif 1297#endif
1298#ifdef CONFIG_UNEVICTABLE_LRU
1299 {
1300 .ctl_name = CTL_UNNUMBERED,
1301 .procname = "scan_unevictable_pages",
1302 .data = &scan_unevictable_pages,
1303 .maxlen = sizeof(scan_unevictable_pages),
1304 .mode = 0644,
1305 .proc_handler = &scan_unevictable_handler,
1306 },
1307#endif
1292/* 1308/*
1293 * NOTE: do not add new entries to this table unless you have read 1309 * NOTE: do not add new entries to this table unless you have read
1294 * Documentation/sysctl/ctl_unnumbered.txt 1310 * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c46c931a7fe7..ecfd7b5187e0 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -181,12 +181,12 @@ static void clocksource_watchdog(unsigned long data)
181 181
182 resumed = test_and_clear_bit(0, &watchdog_resumed); 182 resumed = test_and_clear_bit(0, &watchdog_resumed);
183 183
184 wdnow = watchdog->read(); 184 wdnow = watchdog->read(watchdog);
185 wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); 185 wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask);
186 watchdog_last = wdnow; 186 watchdog_last = wdnow;
187 187
188 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { 188 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
189 csnow = cs->read(); 189 csnow = cs->read(cs);
190 190
191 if (unlikely(resumed)) { 191 if (unlikely(resumed)) {
192 cs->wd_last = csnow; 192 cs->wd_last = csnow;
@@ -247,7 +247,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
247 247
248 list_add(&cs->wd_list, &watchdog_list); 248 list_add(&cs->wd_list, &watchdog_list);
249 if (!started && watchdog) { 249 if (!started && watchdog) {
250 watchdog_last = watchdog->read(); 250 watchdog_last = watchdog->read(watchdog);
251 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; 251 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
252 add_timer_on(&watchdog_timer, 252 add_timer_on(&watchdog_timer,
253 cpumask_first(cpu_online_mask)); 253 cpumask_first(cpu_online_mask));
@@ -268,7 +268,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
268 cse->flags &= ~CLOCK_SOURCE_WATCHDOG; 268 cse->flags &= ~CLOCK_SOURCE_WATCHDOG;
269 /* Start if list is not empty */ 269 /* Start if list is not empty */
270 if (!list_empty(&watchdog_list)) { 270 if (!list_empty(&watchdog_list)) {
271 watchdog_last = watchdog->read(); 271 watchdog_last = watchdog->read(watchdog);
272 watchdog_timer.expires = 272 watchdog_timer.expires =
273 jiffies + WATCHDOG_INTERVAL; 273 jiffies + WATCHDOG_INTERVAL;
274 add_timer_on(&watchdog_timer, 274 add_timer_on(&watchdog_timer,
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 06f197560f3b..c3f6c30816e3 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -50,7 +50,7 @@
50 */ 50 */
51#define JIFFIES_SHIFT 8 51#define JIFFIES_SHIFT 8
52 52
53static cycle_t jiffies_read(void) 53static cycle_t jiffies_read(struct clocksource *cs)
54{ 54{
55 return (cycle_t) jiffies; 55 return (cycle_t) jiffies;
56} 56}
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 21a5ca849514..83c4417b6a3c 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -93,7 +93,17 @@ void tick_handle_periodic(struct clock_event_device *dev)
93 for (;;) { 93 for (;;) {
94 if (!clockevents_program_event(dev, next, ktime_get())) 94 if (!clockevents_program_event(dev, next, ktime_get()))
95 return; 95 return;
96 tick_periodic(cpu); 96 /*
97 * Have to be careful here. If we're in oneshot mode,
98 * before we call tick_periodic() in a loop, we need
99 * to be sure we're using a real hardware clocksource.
100 * Otherwise we could get trapped in an infinite
101 * loop, as the tick_periodic() increments jiffies,
102 * when then will increment time, posibly causing
103 * the loop to trigger again and again.
104 */
105 if (timekeeping_valid_for_hres())
106 tick_periodic(cpu);
97 next = ktime_add(next, tick_period); 107 next = ktime_add(next, tick_period);
98 } 108 }
99} 109}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 900f1b6598d1..687dff49f6e7 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -182,7 +182,7 @@ EXPORT_SYMBOL(do_settimeofday);
182 */ 182 */
183static void change_clocksource(void) 183static void change_clocksource(void)
184{ 184{
185 struct clocksource *new; 185 struct clocksource *new, *old;
186 186
187 new = clocksource_get_next(); 187 new = clocksource_get_next();
188 188
@@ -191,11 +191,16 @@ static void change_clocksource(void)
191 191
192 clocksource_forward_now(); 192 clocksource_forward_now();
193 193
194 new->raw_time = clock->raw_time; 194 if (clocksource_enable(new))
195 return;
195 196
197 new->raw_time = clock->raw_time;
198 old = clock;
196 clock = new; 199 clock = new;
200 clocksource_disable(old);
201
197 clock->cycle_last = 0; 202 clock->cycle_last = 0;
198 clock->cycle_last = clocksource_read(new); 203 clock->cycle_last = clocksource_read(clock);
199 clock->error = 0; 204 clock->error = 0;
200 clock->xtime_nsec = 0; 205 clock->xtime_nsec = 0;
201 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); 206 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
@@ -292,6 +297,7 @@ void __init timekeeping_init(void)
292 ntp_init(); 297 ntp_init();
293 298
294 clock = clocksource_get_next(); 299 clock = clocksource_get_next();
300 clocksource_enable(clock);
295 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); 301 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
296 clock->cycle_last = clocksource_read(clock); 302 clock->cycle_last = clocksource_read(clock);
297 303
diff --git a/kernel/timer.c b/kernel/timer.c
index b4555568b4e4..cffffad01c31 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -531,10 +531,13 @@ static void __init_timer(struct timer_list *timer,
531} 531}
532 532
533/** 533/**
534 * init_timer - initialize a timer. 534 * init_timer_key - initialize a timer
535 * @timer: the timer to be initialized 535 * @timer: the timer to be initialized
536 * @name: name of the timer
537 * @key: lockdep class key of the fake lock used for tracking timer
538 * sync lock dependencies
536 * 539 *
537 * init_timer() must be done to a timer prior calling *any* of the 540 * init_timer_key() must be done to a timer prior calling *any* of the
538 * other timer functions. 541 * other timer functions.
539 */ 542 */
540void init_timer_key(struct timer_list *timer, 543void init_timer_key(struct timer_list *timer,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2246141bda4d..417d1985e299 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -312,7 +312,7 @@ config KMEMTRACE
312 and profile kernel code. 312 and profile kernel code.
313 313
314 This requires an userspace application to use. See 314 This requires an userspace application to use. See
315 Documentation/vm/kmemtrace.txt for more information. 315 Documentation/trace/kmemtrace.txt for more information.
316 316
317 Saying Y will make the kernel somewhat larger and slower. However, 317 Saying Y will make the kernel somewhat larger and slower. However,
318 if you disable kmemtrace at run-time or boot-time, the performance 318 if you disable kmemtrace at run-time or boot-time, the performance
@@ -403,7 +403,7 @@ config MMIOTRACE
403 implementation and works via page faults. Tracing is disabled by 403 implementation and works via page faults. Tracing is disabled by
404 default and can be enabled at run-time. 404 default and can be enabled at run-time.
405 405
406 See Documentation/tracers/mmiotrace.txt. 406 See Documentation/trace/mmiotrace.txt.
407 If you are not helping to develop drivers, say N. 407 If you are not helping to develop drivers, say N.
408 408
409config MMIOTRACE_TEST 409config MMIOTRACE_TEST
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 947c5b3f90c4..921ef5d1f0ba 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -327,10 +327,10 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
327 char *msg; 327 char *msg;
328 struct blk_trace *bt; 328 struct blk_trace *bt;
329 329
330 if (count > BLK_TN_MAX_MSG) 330 if (count >= BLK_TN_MAX_MSG)
331 return -EINVAL; 331 return -EINVAL;
332 332
333 msg = kmalloc(count, GFP_KERNEL); 333 msg = kmalloc(count + 1, GFP_KERNEL);
334 if (msg == NULL) 334 if (msg == NULL)
335 return -ENOMEM; 335 return -ENOMEM;
336 336
@@ -339,6 +339,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
339 return -EFAULT; 339 return -EFAULT;
340 } 340 }
341 341
342 msg[count] = '\0';
342 bt = filp->private_data; 343 bt = filp->private_data;
343 __trace_note_message(bt, "%s", msg); 344 __trace_note_message(bt, "%s", msg);
344 kfree(msg); 345 kfree(msg);
@@ -642,7 +643,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
642 if (blk_pc_request(rq)) { 643 if (blk_pc_request(rq)) {
643 what |= BLK_TC_ACT(BLK_TC_PC); 644 what |= BLK_TC_ACT(BLK_TC_PC);
644 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, 645 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
645 sizeof(rq->cmd), rq->cmd); 646 rq->cmd_len, rq->cmd);
646 } else { 647 } else {
647 what |= BLK_TC_ACT(BLK_TC_FS); 648 what |= BLK_TC_ACT(BLK_TC_FS);
648 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, 649 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
@@ -1376,12 +1377,12 @@ static int blk_trace_str2mask(const char *str)
1376{ 1377{
1377 int i; 1378 int i;
1378 int mask = 0; 1379 int mask = 0;
1379 char *s, *token; 1380 char *buf, *s, *token;
1380 1381
1381 s = kstrdup(str, GFP_KERNEL); 1382 buf = kstrdup(str, GFP_KERNEL);
1382 if (s == NULL) 1383 if (buf == NULL)
1383 return -ENOMEM; 1384 return -ENOMEM;
1384 s = strstrip(s); 1385 s = strstrip(buf);
1385 1386
1386 while (1) { 1387 while (1) {
1387 token = strsep(&s, ","); 1388 token = strsep(&s, ",");
@@ -1402,7 +1403,7 @@ static int blk_trace_str2mask(const char *str)
1402 break; 1403 break;
1403 } 1404 }
1404 } 1405 }
1405 kfree(s); 1406 kfree(buf);
1406 1407
1407 return mask; 1408 return mask;
1408} 1409}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a0174a40c563..a884c09006c4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -30,6 +30,7 @@
30#include <linux/percpu.h> 30#include <linux/percpu.h>
31#include <linux/splice.h> 31#include <linux/splice.h>
32#include <linux/kdebug.h> 32#include <linux/kdebug.h>
33#include <linux/string.h>
33#include <linux/ctype.h> 34#include <linux/ctype.h>
34#include <linux/init.h> 35#include <linux/init.h>
35#include <linux/poll.h> 36#include <linux/poll.h>
@@ -147,8 +148,7 @@ static int __init set_ftrace_dump_on_oops(char *str)
147} 148}
148__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); 149__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
149 150
150long 151unsigned long long ns2usecs(cycle_t nsec)
151ns2usecs(cycle_t nsec)
152{ 152{
153 nsec += 500; 153 nsec += 500;
154 do_div(nsec, 1000); 154 do_div(nsec, 1000);
@@ -1632,7 +1632,11 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
1632 return; 1632 return;
1633 1633
1634 cpumask_set_cpu(iter->cpu, iter->started); 1634 cpumask_set_cpu(iter->cpu, iter->started);
1635 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); 1635
1636 /* Don't print started cpu buffer for the first entry of the trace */
1637 if (iter->idx > 1)
1638 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
1639 iter->cpu);
1636} 1640}
1637 1641
1638static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 1642static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
@@ -1867,6 +1871,11 @@ __tracing_open(struct inode *inode, struct file *file)
1867 if (current_trace) 1871 if (current_trace)
1868 *iter->trace = *current_trace; 1872 *iter->trace = *current_trace;
1869 1873
1874 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
1875 goto fail;
1876
1877 cpumask_clear(iter->started);
1878
1870 if (current_trace && current_trace->print_max) 1879 if (current_trace && current_trace->print_max)
1871 iter->tr = &max_tr; 1880 iter->tr = &max_tr;
1872 else 1881 else
@@ -1917,6 +1926,7 @@ __tracing_open(struct inode *inode, struct file *file)
1917 if (iter->buffer_iter[cpu]) 1926 if (iter->buffer_iter[cpu])
1918 ring_buffer_read_finish(iter->buffer_iter[cpu]); 1927 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1919 } 1928 }
1929 free_cpumask_var(iter->started);
1920 fail: 1930 fail:
1921 mutex_unlock(&trace_types_lock); 1931 mutex_unlock(&trace_types_lock);
1922 kfree(iter->trace); 1932 kfree(iter->trace);
@@ -1960,6 +1970,7 @@ static int tracing_release(struct inode *inode, struct file *file)
1960 1970
1961 seq_release(inode, file); 1971 seq_release(inode, file);
1962 mutex_destroy(&iter->mutex); 1972 mutex_destroy(&iter->mutex);
1973 free_cpumask_var(iter->started);
1963 kfree(iter->trace); 1974 kfree(iter->trace);
1964 kfree(iter); 1975 kfree(iter);
1965 return 0; 1976 return 0;
@@ -2358,9 +2369,9 @@ static const char readme_msg[] =
2358 "# mkdir /debug\n" 2369 "# mkdir /debug\n"
2359 "# mount -t debugfs nodev /debug\n\n" 2370 "# mount -t debugfs nodev /debug\n\n"
2360 "# cat /debug/tracing/available_tracers\n" 2371 "# cat /debug/tracing/available_tracers\n"
2361 "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n" 2372 "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
2362 "# cat /debug/tracing/current_tracer\n" 2373 "# cat /debug/tracing/current_tracer\n"
2363 "none\n" 2374 "nop\n"
2364 "# echo sched_switch > /debug/tracing/current_tracer\n" 2375 "# echo sched_switch > /debug/tracing/current_tracer\n"
2365 "# cat /debug/tracing/current_tracer\n" 2376 "# cat /debug/tracing/current_tracer\n"
2366 "sched_switch\n" 2377 "sched_switch\n"
@@ -3266,19 +3277,13 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
3266 3277
3267 info->tr = &global_trace; 3278 info->tr = &global_trace;
3268 info->cpu = cpu; 3279 info->cpu = cpu;
3269 info->spare = ring_buffer_alloc_read_page(info->tr->buffer); 3280 info->spare = NULL;
3270 /* Force reading ring buffer for first read */ 3281 /* Force reading ring buffer for first read */
3271 info->read = (unsigned int)-1; 3282 info->read = (unsigned int)-1;
3272 if (!info->spare)
3273 goto out;
3274 3283
3275 filp->private_data = info; 3284 filp->private_data = info;
3276 3285
3277 return 0; 3286 return nonseekable_open(inode, filp);
3278
3279 out:
3280 kfree(info);
3281 return -ENOMEM;
3282} 3287}
3283 3288
3284static ssize_t 3289static ssize_t
@@ -3293,6 +3298,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3293 if (!count) 3298 if (!count)
3294 return 0; 3299 return 0;
3295 3300
3301 if (!info->spare)
3302 info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
3303 if (!info->spare)
3304 return -ENOMEM;
3305
3296 /* Do we have previous read data to read? */ 3306 /* Do we have previous read data to read? */
3297 if (info->read < PAGE_SIZE) 3307 if (info->read < PAGE_SIZE)
3298 goto read; 3308 goto read;
@@ -3331,7 +3341,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
3331{ 3341{
3332 struct ftrace_buffer_info *info = file->private_data; 3342 struct ftrace_buffer_info *info = file->private_data;
3333 3343
3334 ring_buffer_free_read_page(info->tr->buffer, info->spare); 3344 if (info->spare)
3345 ring_buffer_free_read_page(info->tr->buffer, info->spare);
3335 kfree(info); 3346 kfree(info);
3336 3347
3337 return 0; 3348 return 0;
@@ -3417,14 +3428,19 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3417 int size, i; 3428 int size, i;
3418 size_t ret; 3429 size_t ret;
3419 3430
3420 /* 3431 if (*ppos & (PAGE_SIZE - 1)) {
3421 * We can't seek on a buffer input 3432 WARN_ONCE(1, "Ftrace: previous read must page-align\n");
3422 */ 3433 return -EINVAL;
3423 if (unlikely(*ppos)) 3434 }
3424 return -ESPIPE;
3425 3435
3436 if (len & (PAGE_SIZE - 1)) {
3437 WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
3438 if (len < PAGE_SIZE)
3439 return -EINVAL;
3440 len &= PAGE_MASK;
3441 }
3426 3442
3427 for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) { 3443 for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) {
3428 struct page *page; 3444 struct page *page;
3429 int r; 3445 int r;
3430 3446
@@ -3432,6 +3448,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3432 if (!ref) 3448 if (!ref)
3433 break; 3449 break;
3434 3450
3451 ref->ref = 1;
3435 ref->buffer = info->tr->buffer; 3452 ref->buffer = info->tr->buffer;
3436 ref->page = ring_buffer_alloc_read_page(ref->buffer); 3453 ref->page = ring_buffer_alloc_read_page(ref->buffer);
3437 if (!ref->page) { 3454 if (!ref->page) {
@@ -3463,6 +3480,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3463 spd.partial[i].offset = 0; 3480 spd.partial[i].offset = 0;
3464 spd.partial[i].private = (unsigned long)ref; 3481 spd.partial[i].private = (unsigned long)ref;
3465 spd.nr_pages++; 3482 spd.nr_pages++;
3483 *ppos += PAGE_SIZE;
3466 } 3484 }
3467 3485
3468 spd.nr_pages = i; 3486 spd.nr_pages = i;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cbc168f1e43d..e685ac2b2ba1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -602,7 +602,7 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
602#endif /* CONFIG_FTRACE_STARTUP_TEST */ 602#endif /* CONFIG_FTRACE_STARTUP_TEST */
603 603
604extern void *head_page(struct trace_array_cpu *data); 604extern void *head_page(struct trace_array_cpu *data);
605extern long ns2usecs(cycle_t nsec); 605extern unsigned long long ns2usecs(cycle_t nsec);
606extern int 606extern int
607trace_vbprintk(unsigned long ip, const char *fmt, va_list args); 607trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
608extern int 608extern int
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index ad8c22efff41..8333715e4066 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -155,6 +155,13 @@ static enum print_line_t trace_branch_print(struct trace_iterator *iter,
155 return TRACE_TYPE_HANDLED; 155 return TRACE_TYPE_HANDLED;
156} 156}
157 157
158static void branch_print_header(struct seq_file *s)
159{
160 seq_puts(s, "# TASK-PID CPU# TIMESTAMP CORRECT"
161 " FUNC:FILE:LINE\n");
162 seq_puts(s, "# | | | | | "
163 " |\n");
164}
158 165
159static struct trace_event trace_branch_event = { 166static struct trace_event trace_branch_event = {
160 .type = TRACE_BRANCH, 167 .type = TRACE_BRANCH,
@@ -169,6 +176,7 @@ static struct tracer branch_trace __read_mostly =
169#ifdef CONFIG_FTRACE_SELFTEST 176#ifdef CONFIG_FTRACE_SELFTEST
170 .selftest = trace_selftest_startup_branch, 177 .selftest = trace_selftest_startup_branch,
171#endif /* CONFIG_FTRACE_SELFTEST */ 178#endif /* CONFIG_FTRACE_SELFTEST */
179 .print_header = branch_print_header,
172}; 180};
173 181
174__init static int init_branch_tracer(void) 182__init static int init_branch_tracer(void)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 64ec4d278ffb..576f4fa2af0d 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -503,6 +503,7 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
503 503
504 if (copy_from_user(&buf, ubuf, cnt)) 504 if (copy_from_user(&buf, ubuf, cnt))
505 return -EFAULT; 505 return -EFAULT;
506 buf[cnt] = '\0';
506 507
507 pred = kzalloc(sizeof(*pred), GFP_KERNEL); 508 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
508 if (!pred) 509 if (!pred)
@@ -520,9 +521,10 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
520 return cnt; 521 return cnt;
521 } 522 }
522 523
523 if (filter_add_pred(call, pred)) { 524 err = filter_add_pred(call, pred);
525 if (err < 0) {
524 filter_free_pred(pred); 526 filter_free_pred(pred);
525 return -EINVAL; 527 return err;
526 } 528 }
527 529
528 *ppos += cnt; 530 *ppos += cnt;
@@ -569,6 +571,7 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
569 571
570 if (copy_from_user(&buf, ubuf, cnt)) 572 if (copy_from_user(&buf, ubuf, cnt))
571 return -EFAULT; 573 return -EFAULT;
574 buf[cnt] = '\0';
572 575
573 pred = kzalloc(sizeof(*pred), GFP_KERNEL); 576 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
574 if (!pred) 577 if (!pred)
@@ -586,10 +589,11 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
586 return cnt; 589 return cnt;
587 } 590 }
588 591
589 if (filter_add_subsystem_pred(system, pred)) { 592 err = filter_add_subsystem_pred(system, pred);
593 if (err < 0) {
590 filter_free_subsystem_preds(system); 594 filter_free_subsystem_preds(system);
591 filter_free_pred(pred); 595 filter_free_pred(pred);
592 return -EINVAL; 596 return err;
593 } 597 }
594 598
595 *ppos += cnt; 599 *ppos += cnt;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 026be412f356..e03cbf1e38f3 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -215,7 +215,7 @@ static int __filter_add_pred(struct ftrace_event_call *call,
215 } 215 }
216 } 216 }
217 217
218 return -ENOMEM; 218 return -ENOSPC;
219} 219}
220 220
221static int is_string_field(const char *type) 221static int is_string_field(const char *type)
@@ -319,7 +319,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
319 } 319 }
320 320
321 if (i == MAX_FILTER_PRED) 321 if (i == MAX_FILTER_PRED)
322 return -EINVAL; 322 return -ENOSPC;
323 323
324 events_for_each(call) { 324 events_for_each(call) {
325 int err; 325 int err;
@@ -410,16 +410,22 @@ int filter_parse(char **pbuf, struct filter_pred *pred)
410 } 410 }
411 } 411 }
412 412
413 if (!val_str) {
414 pred->field_name = NULL;
415 return -EINVAL;
416 }
417
413 pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); 418 pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
414 if (!pred->field_name) 419 if (!pred->field_name)
415 return -ENOMEM; 420 return -ENOMEM;
416 421
417 pred->val = simple_strtoull(val_str, &tmp, 10); 422 pred->val = simple_strtoull(val_str, &tmp, 0);
418 if (tmp == val_str) { 423 if (tmp == val_str) {
419 pred->str_val = kstrdup(val_str, GFP_KERNEL); 424 pred->str_val = kstrdup(val_str, GFP_KERNEL);
420 if (!pred->str_val) 425 if (!pred->str_val)
421 return -ENOMEM; 426 return -ENOMEM;
422 } 427 } else if (*tmp != '\0')
428 return -EINVAL;
423 429
424 return 0; 430 return 0;
425} 431}
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
index 30743f7d4110..d363c6672c6c 100644
--- a/kernel/trace/trace_events_stage_2.h
+++ b/kernel/trace/trace_events_stage_2.h
@@ -105,10 +105,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
105 return 0; 105 return 0;
106 106
107#undef __entry 107#undef __entry
108#define __entry "REC" 108#define __entry REC
109 109
110#undef TP_printk 110#undef TP_printk
111#define TP_printk(fmt, args...) "%s, %s\n", #fmt, #args 111#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
112 112
113#undef TP_fast_assign 113#undef TP_fast_assign
114#define TP_fast_assign(args...) args 114#define TP_fast_assign(args...) args
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 4d9952d3df50..07a22c33ebf3 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -40,7 +40,7 @@
40 40
41#undef TRACE_FIELD_ZERO_CHAR 41#undef TRACE_FIELD_ZERO_CHAR
42#define TRACE_FIELD_ZERO_CHAR(item) \ 42#define TRACE_FIELD_ZERO_CHAR(item) \
43 ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \ 43 ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \
44 "offset:%u;\tsize:0;\n", \ 44 "offset:%u;\tsize:0;\n", \
45 (unsigned int)offsetof(typeof(field), item)); \ 45 (unsigned int)offsetof(typeof(field), item)); \
46 if (!ret) \ 46 if (!ret) \
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index d72b9a63b247..64b54a59c55b 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -423,7 +423,7 @@ int trace_print_lat_context(struct trace_iterator *iter)
423 423
424 trace_find_cmdline(entry->pid, comm); 424 trace_find_cmdline(entry->pid, comm);
425 425
426 ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]" 426 ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]"
427 " %ld.%03ldms (+%ld.%03ldms): ", comm, 427 " %ld.%03ldms (+%ld.%03ldms): ", comm,
428 entry->pid, iter->cpu, entry->flags, 428 entry->pid, iter->cpu, entry->flags,
429 entry->preempt_count, iter->idx, 429 entry->preempt_count, iter->idx,
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index bae791ebcc51..118439709fb7 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -186,6 +186,12 @@ static enum print_line_t power_print_line(struct trace_iterator *iter)
186 return TRACE_TYPE_UNHANDLED; 186 return TRACE_TYPE_UNHANDLED;
187} 187}
188 188
189static void power_print_header(struct seq_file *s)
190{
191 seq_puts(s, "# TIMESTAMP STATE EVENT\n");
192 seq_puts(s, "# | | |\n");
193}
194
189static struct tracer power_tracer __read_mostly = 195static struct tracer power_tracer __read_mostly =
190{ 196{
191 .name = "power", 197 .name = "power",
@@ -194,6 +200,7 @@ static struct tracer power_tracer __read_mostly =
194 .stop = stop_power_trace, 200 .stop = stop_power_trace,
195 .reset = power_trace_reset, 201 .reset = power_trace_reset,
196 .print_line = power_print_line, 202 .print_line = power_print_line,
203 .print_header = power_print_header,
197}; 204};
198 205
199static int init_power_trace(void) 206static int init_power_trace(void)
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index de35f200abd3..9117cea6f1ae 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -62,6 +62,9 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
62 pc = preempt_count(); 62 pc = preempt_count();
63 tracing_record_cmdline(current); 63 tracing_record_cmdline(current);
64 64
65 if (sched_stopped)
66 return;
67
65 local_irq_save(flags); 68 local_irq_save(flags);
66 cpu = raw_smp_processor_id(); 69 cpu = raw_smp_processor_id();
67 data = ctx_trace->data[cpu]; 70 data = ctx_trace->data[cpu];
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3c5ad6b2ec84..5bc00e8f153e 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -154,7 +154,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
154 if (unlikely(!tracer_enabled || next != wakeup_task)) 154 if (unlikely(!tracer_enabled || next != wakeup_task))
155 goto out_unlock; 155 goto out_unlock;
156 156
157 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 157 trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
158 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); 158 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
159 159
160 /* 160 /*
@@ -257,6 +257,12 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
257 data = wakeup_trace->data[wakeup_cpu]; 257 data = wakeup_trace->data[wakeup_cpu];
258 data->preempt_timestamp = ftrace_now(cpu); 258 data->preempt_timestamp = ftrace_now(cpu);
259 tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); 259 tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
260
261 /*
262 * We must be careful in using CALLER_ADDR2. But since wake_up
263 * is not called by an assembly function (where as schedule is)
264 * it should be safe to use it here.
265 */
260 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 266 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
261 267
262out_locked: 268out_locked:
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index a2a3af29c943..5e579645ac86 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,5 +1,5 @@
1#include <trace/syscall.h>
1#include <linux/kernel.h> 2#include <linux/kernel.h>
2#include <linux/ftrace.h>
3#include <asm/syscall.h> 3#include <asm/syscall.h>
4 4
5#include "trace_output.h" 5#include "trace_output.h"
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b6b966ce1451..f71fb2a08950 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -966,20 +966,20 @@ undo:
966} 966}
967 967
968#ifdef CONFIG_SMP 968#ifdef CONFIG_SMP
969static struct workqueue_struct *work_on_cpu_wq __read_mostly;
970 969
971struct work_for_cpu { 970struct work_for_cpu {
972 struct work_struct work; 971 struct completion completion;
973 long (*fn)(void *); 972 long (*fn)(void *);
974 void *arg; 973 void *arg;
975 long ret; 974 long ret;
976}; 975};
977 976
978static void do_work_for_cpu(struct work_struct *w) 977static int do_work_for_cpu(void *_wfc)
979{ 978{
980 struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work); 979 struct work_for_cpu *wfc = _wfc;
981
982 wfc->ret = wfc->fn(wfc->arg); 980 wfc->ret = wfc->fn(wfc->arg);
981 complete(&wfc->completion);
982 return 0;
983} 983}
984 984
985/** 985/**
@@ -990,17 +990,23 @@ static void do_work_for_cpu(struct work_struct *w)
990 * 990 *
991 * This will return the value @fn returns. 991 * This will return the value @fn returns.
992 * It is up to the caller to ensure that the cpu doesn't go offline. 992 * It is up to the caller to ensure that the cpu doesn't go offline.
993 * The caller must not hold any locks which would prevent @fn from completing.
993 */ 994 */
994long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) 995long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
995{ 996{
996 struct work_for_cpu wfc; 997 struct task_struct *sub_thread;
997 998 struct work_for_cpu wfc = {
998 INIT_WORK(&wfc.work, do_work_for_cpu); 999 .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
999 wfc.fn = fn; 1000 .fn = fn,
1000 wfc.arg = arg; 1001 .arg = arg,
1001 queue_work_on(cpu, work_on_cpu_wq, &wfc.work); 1002 };
1002 flush_work(&wfc.work); 1003
1003 1004 sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
1005 if (IS_ERR(sub_thread))
1006 return PTR_ERR(sub_thread);
1007 kthread_bind(sub_thread, cpu);
1008 wake_up_process(sub_thread);
1009 wait_for_completion(&wfc.completion);
1004 return wfc.ret; 1010 return wfc.ret;
1005} 1011}
1006EXPORT_SYMBOL_GPL(work_on_cpu); 1012EXPORT_SYMBOL_GPL(work_on_cpu);
@@ -1016,8 +1022,4 @@ void __init init_workqueues(void)
1016 hotcpu_notifier(workqueue_cpu_callback, 0); 1022 hotcpu_notifier(workqueue_cpu_callback, 0);
1017 keventd_wq = create_workqueue("events"); 1023 keventd_wq = create_workqueue("events");
1018 BUG_ON(!keventd_wq); 1024 BUG_ON(!keventd_wq);
1019#ifdef CONFIG_SMP
1020 work_on_cpu_wq = create_workqueue("work_on_cpu");
1021 BUG_ON(!work_on_cpu_wq);
1022#endif
1023} 1025}