aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/audit_tree.c3
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c21
-rw-r--r--kernel/futex.c7
-rw-r--r--kernel/hung_task.c217
-rw-r--r--kernel/irq/devres.c16
-rw-r--r--kernel/irq/handle.c50
-rw-r--r--kernel/irq/manage.c189
-rw-r--r--kernel/irq/numa_migrate.c1
-rw-r--r--kernel/kprobes.c281
-rw-r--r--kernel/kthread.c26
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/mutex.c3
-rw-r--r--kernel/panic.c12
-rw-r--r--kernel/posix-cpu-timers.c9
-rw-r--r--kernel/power/disk.c8
-rw-r--r--kernel/power/main.c24
-rw-r--r--kernel/power/swap.c2
-rw-r--r--kernel/power/user.c9
-rw-r--r--kernel/ptrace.c23
-rw-r--r--kernel/rcupdate.c18
-rw-r--r--kernel/rcutree.c19
-rw-r--r--kernel/rcutree_trace.c14
-rw-r--r--kernel/resource.c46
-rw-r--r--kernel/sched.c164
-rw-r--r--kernel/sched_cpupri.c5
-rw-r--r--kernel/sched_rt.c15
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/softlockup.c100
-rw-r--r--kernel/sys.c24
-rw-r--r--kernel/sysctl.c58
-rw-r--r--kernel/time/clocksource.c8
-rw-r--r--kernel/time/jiffies.c2
-rw-r--r--kernel/time/timekeeping.c12
-rw-r--r--kernel/timer.c7
-rw-r--r--kernel/trace/Kconfig4
-rw-r--r--kernel/trace/blktrace.c17
-rw-r--r--kernel/trace/trace.c57
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_branch.c8
-rw-r--r--kernel/trace/trace_events.c12
-rw-r--r--kernel/trace/trace_events_filter.c14
-rw-r--r--kernel/trace/trace_events_stage_2.h4
-rw-r--r--kernel/trace/trace_export.c2
-rw-r--r--kernel/trace/trace_output.c2
-rw-r--r--kernel/trace/trace_power.c7
-rw-r--r--kernel/trace/trace_sched_switch.c3
-rw-r--r--kernel/trace/trace_sched_wakeup.c8
-rw-r--r--kernel/trace/trace_syscalls.c2
-rw-r--r--kernel/workqueue.c36
51 files changed, 1130 insertions, 454 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index c8e1be5f0b00..a35eee3436de 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
74obj-$(CONFIG_KPROBES) += kprobes.o 74obj-$(CONFIG_KPROBES) += kprobes.o
75obj-$(CONFIG_KGDB) += kgdb.o 75obj-$(CONFIG_KGDB) += kgdb.o
76obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o 76obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
77obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
77obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 78obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
78obj-$(CONFIG_SECCOMP) += seccomp.o 79obj-$(CONFIG_SECCOMP) += seccomp.o
79obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 80obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 917ab9525568..6e7351739a82 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -734,9 +734,6 @@ int audit_tag_tree(char *old, char *new)
734 dentry = dget(path.dentry); 734 dentry = dget(path.dentry);
735 path_put(&path); 735 path_put(&path);
736 736
737 if (dentry == tagged->mnt_root && dentry == mnt->mnt_root)
738 follow_up(&mnt, &dentry);
739
740 list_add_tail(&list, &tagged->mnt_list); 737 list_add_tail(&list, &tagged->mnt_list);
741 738
742 mutex_lock(&audit_filter_mutex); 739 mutex_lock(&audit_filter_mutex);
diff --git a/kernel/exit.c b/kernel/exit.c
index 32cbf2607cb0..abf9cf3b95c6 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -923,6 +923,8 @@ NORET_TYPE void do_exit(long code)
923 schedule(); 923 schedule();
924 } 924 }
925 925
926 exit_irq_thread();
927
926 exit_signals(tsk); /* sets PF_EXITING */ 928 exit_signals(tsk); /* sets PF_EXITING */
927 /* 929 /*
928 * tsk->flags are checked in the futex code to protect against 930 * tsk->flags are checked in the futex code to protect against
diff --git a/kernel/fork.c b/kernel/fork.c
index 69bde7a22e9b..7d10c46cbb4e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -645,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
645 645
646 tsk->min_flt = tsk->maj_flt = 0; 646 tsk->min_flt = tsk->maj_flt = 0;
647 tsk->nvcsw = tsk->nivcsw = 0; 647 tsk->nvcsw = tsk->nivcsw = 0;
648#ifdef CONFIG_DETECT_HUNG_TASK
649 tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
650#endif
648 651
649 tsk->mm = NULL; 652 tsk->mm = NULL;
650 tsk->active_mm = NULL; 653 tsk->active_mm = NULL;
@@ -797,6 +800,12 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
797 sig->cputime_expires.virt_exp = cputime_zero; 800 sig->cputime_expires.virt_exp = cputime_zero;
798 sig->cputime_expires.sched_exp = 0; 801 sig->cputime_expires.sched_exp = 0;
799 802
803 if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
804 sig->cputime_expires.prof_exp =
805 secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
806 sig->cputimer.running = 1;
807 }
808
800 /* The timer lists. */ 809 /* The timer lists. */
801 INIT_LIST_HEAD(&sig->cpu_timers[0]); 810 INIT_LIST_HEAD(&sig->cpu_timers[0]);
802 INIT_LIST_HEAD(&sig->cpu_timers[1]); 811 INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@ -812,11 +821,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
812 atomic_inc(&current->signal->live); 821 atomic_inc(&current->signal->live);
813 return 0; 822 return 0;
814 } 823 }
815 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
816
817 if (sig)
818 posix_cpu_timers_init_group(sig);
819 824
825 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
820 tsk->signal = sig; 826 tsk->signal = sig;
821 if (!sig) 827 if (!sig)
822 return -ENOMEM; 828 return -ENOMEM;
@@ -856,6 +862,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
856 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 862 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
857 task_unlock(current->group_leader); 863 task_unlock(current->group_leader);
858 864
865 posix_cpu_timers_init_group(sig);
866
859 acct_init_pacct(&sig->pacct); 867 acct_init_pacct(&sig->pacct);
860 868
861 tty_audit_fork(sig); 869 tty_audit_fork(sig);
@@ -1032,11 +1040,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1032 1040
1033 p->default_timer_slack_ns = current->timer_slack_ns; 1041 p->default_timer_slack_ns = current->timer_slack_ns;
1034 1042
1035#ifdef CONFIG_DETECT_SOFTLOCKUP
1036 p->last_switch_count = 0;
1037 p->last_switch_timestamp = 0;
1038#endif
1039
1040 task_io_accounting_init(&p->ioac); 1043 task_io_accounting_init(&p->ioac);
1041 acct_clear_integrals(p); 1044 acct_clear_integrals(p);
1042 1045
diff --git a/kernel/futex.c b/kernel/futex.c
index 6b50a024bca2..eef8cd26b5e5 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -883,7 +883,12 @@ retry_private:
883out_unlock: 883out_unlock:
884 double_unlock_hb(hb1, hb2); 884 double_unlock_hb(hb1, hb2);
885 885
886 /* drop_futex_key_refs() must be called outside the spinlocks. */ 886 /*
887 * drop_futex_key_refs() must be called outside the spinlocks. During
888 * the requeue we moved futex_q's from the hash bucket at key1 to the
889 * one at key2 and updated their key pointer. We no longer need to
890 * hold the references to key1.
891 */
887 while (--drop_count >= 0) 892 while (--drop_count >= 0)
888 drop_futex_key_refs(&key1); 893 drop_futex_key_refs(&key1);
889 894
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
new file mode 100644
index 000000000000..022a4927b785
--- /dev/null
+++ b/kernel/hung_task.c
@@ -0,0 +1,217 @@
1/*
2 * Detect Hung Task
3 *
4 * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
5 *
6 */
7
8#include <linux/mm.h>
9#include <linux/cpu.h>
10#include <linux/nmi.h>
11#include <linux/init.h>
12#include <linux/delay.h>
13#include <linux/freezer.h>
14#include <linux/kthread.h>
15#include <linux/lockdep.h>
16#include <linux/module.h>
17#include <linux/sysctl.h>
18
19/*
20 * The number of tasks checked:
21 */
22unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
23
24/*
25 * Limit number of tasks checked in a batch.
26 *
27 * This value controls the preemptibility of khungtaskd since preemption
28 * is disabled during the critical section. It also controls the size of
29 * the RCU grace period. So it needs to be upper-bound.
30 */
31#define HUNG_TASK_BATCHING 1024
32
33/*
34 * Zero means infinite timeout - no checking done:
35 */
36unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
37
38unsigned long __read_mostly sysctl_hung_task_warnings = 10;
39
40static int __read_mostly did_panic;
41
42static struct task_struct *watchdog_task;
43
44/*
45 * Should we panic (and reboot, if panic_timeout= is set) when a
46 * hung task is detected:
47 */
48unsigned int __read_mostly sysctl_hung_task_panic =
49 CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
50
51static int __init hung_task_panic_setup(char *str)
52{
53 sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
54
55 return 1;
56}
57__setup("hung_task_panic=", hung_task_panic_setup);
58
59static int
60hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
61{
62 did_panic = 1;
63
64 return NOTIFY_DONE;
65}
66
67static struct notifier_block panic_block = {
68 .notifier_call = hung_task_panic,
69};
70
71static void check_hung_task(struct task_struct *t, unsigned long timeout)
72{
73 unsigned long switch_count = t->nvcsw + t->nivcsw;
74
75 /*
76 * Ensure the task is not frozen.
77 * Also, when a freshly created task is scheduled once, changes
78 * its state to TASK_UNINTERRUPTIBLE without having ever been
79 * switched out once, it musn't be checked.
80 */
81 if (unlikely(t->flags & PF_FROZEN || !switch_count))
82 return;
83
84 if (switch_count != t->last_switch_count) {
85 t->last_switch_count = switch_count;
86 return;
87 }
88 if (!sysctl_hung_task_warnings)
89 return;
90 sysctl_hung_task_warnings--;
91
92 /*
93 * Ok, the task did not get scheduled for more than 2 minutes,
94 * complain:
95 */
96 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
97 "%ld seconds.\n", t->comm, t->pid, timeout);
98 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
99 " disables this message.\n");
100 sched_show_task(t);
101 __debug_show_held_locks(t);
102
103 touch_nmi_watchdog();
104
105 if (sysctl_hung_task_panic)
106 panic("hung_task: blocked tasks");
107}
108
109/*
110 * To avoid extending the RCU grace period for an unbounded amount of time,
111 * periodically exit the critical section and enter a new one.
112 *
113 * For preemptible RCU it is sufficient to call rcu_read_unlock in order
114 * exit the grace period. For classic RCU, a reschedule is required.
115 */
116static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
117{
118 get_task_struct(g);
119 get_task_struct(t);
120 rcu_read_unlock();
121 cond_resched();
122 rcu_read_lock();
123 put_task_struct(t);
124 put_task_struct(g);
125}
126
127/*
128 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
129 * a really long time (120 seconds). If that happens, print out
130 * a warning.
131 */
132static void check_hung_uninterruptible_tasks(unsigned long timeout)
133{
134 int max_count = sysctl_hung_task_check_count;
135 int batch_count = HUNG_TASK_BATCHING;
136 struct task_struct *g, *t;
137
138 /*
139 * If the system crashed already then all bets are off,
140 * do not report extra hung tasks:
141 */
142 if (test_taint(TAINT_DIE) || did_panic)
143 return;
144
145 rcu_read_lock();
146 do_each_thread(g, t) {
147 if (!--max_count)
148 goto unlock;
149 if (!--batch_count) {
150 batch_count = HUNG_TASK_BATCHING;
151 rcu_lock_break(g, t);
152 /* Exit if t or g was unhashed during refresh. */
153 if (t->state == TASK_DEAD || g->state == TASK_DEAD)
154 goto unlock;
155 }
156 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
157 if (t->state == TASK_UNINTERRUPTIBLE)
158 check_hung_task(t, timeout);
159 } while_each_thread(g, t);
160 unlock:
161 rcu_read_unlock();
162}
163
164static unsigned long timeout_jiffies(unsigned long timeout)
165{
166 /* timeout of 0 will disable the watchdog */
167 return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
168}
169
170/*
171 * Process updating of timeout sysctl
172 */
173int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
174 struct file *filp, void __user *buffer,
175 size_t *lenp, loff_t *ppos)
176{
177 int ret;
178
179 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
180
181 if (ret || !write)
182 goto out;
183
184 wake_up_process(watchdog_task);
185
186 out:
187 return ret;
188}
189
190/*
191 * kthread which checks for tasks stuck in D state
192 */
193static int watchdog(void *dummy)
194{
195 set_user_nice(current, 0);
196
197 for ( ; ; ) {
198 unsigned long timeout = sysctl_hung_task_timeout_secs;
199
200 while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
201 timeout = sysctl_hung_task_timeout_secs;
202
203 check_hung_uninterruptible_tasks(timeout);
204 }
205
206 return 0;
207}
208
209static int __init hung_task_init(void)
210{
211 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
212 watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
213
214 return 0;
215}
216
217module_init(hung_task_init);
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index 38a25b8d8bff..d06df9c41cba 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -26,10 +26,12 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
26} 26}
27 27
28/** 28/**
29 * devm_request_irq - allocate an interrupt line for a managed device 29 * devm_request_threaded_irq - allocate an interrupt line for a managed device
30 * @dev: device to request interrupt for 30 * @dev: device to request interrupt for
31 * @irq: Interrupt line to allocate 31 * @irq: Interrupt line to allocate
32 * @handler: Function to be called when the IRQ occurs 32 * @handler: Function to be called when the IRQ occurs
33 * @thread_fn: function to be called in a threaded interrupt context. NULL
34 * for devices which handle everything in @handler
33 * @irqflags: Interrupt type flags 35 * @irqflags: Interrupt type flags
34 * @devname: An ascii name for the claiming device 36 * @devname: An ascii name for the claiming device
35 * @dev_id: A cookie passed back to the handler function 37 * @dev_id: A cookie passed back to the handler function
@@ -42,9 +44,10 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
42 * If an IRQ allocated with this function needs to be freed 44 * If an IRQ allocated with this function needs to be freed
43 * separately, dev_free_irq() must be used. 45 * separately, dev_free_irq() must be used.
44 */ 46 */
45int devm_request_irq(struct device *dev, unsigned int irq, 47int devm_request_threaded_irq(struct device *dev, unsigned int irq,
46 irq_handler_t handler, unsigned long irqflags, 48 irq_handler_t handler, irq_handler_t thread_fn,
47 const char *devname, void *dev_id) 49 unsigned long irqflags, const char *devname,
50 void *dev_id)
48{ 51{
49 struct irq_devres *dr; 52 struct irq_devres *dr;
50 int rc; 53 int rc;
@@ -54,7 +57,8 @@ int devm_request_irq(struct device *dev, unsigned int irq,
54 if (!dr) 57 if (!dr)
55 return -ENOMEM; 58 return -ENOMEM;
56 59
57 rc = request_irq(irq, handler, irqflags, devname, dev_id); 60 rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname,
61 dev_id);
58 if (rc) { 62 if (rc) {
59 devres_free(dr); 63 devres_free(dr);
60 return rc; 64 return rc;
@@ -66,7 +70,7 @@ int devm_request_irq(struct device *dev, unsigned int irq,
66 70
67 return 0; 71 return 0;
68} 72}
69EXPORT_SYMBOL(devm_request_irq); 73EXPORT_SYMBOL(devm_request_threaded_irq);
70 74
71/** 75/**
72 * devm_free_irq - free an interrupt 76 * devm_free_irq - free an interrupt
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 343acecae629..d82142be8dd2 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -339,6 +339,15 @@ irqreturn_t no_action(int cpl, void *dev_id)
339 return IRQ_NONE; 339 return IRQ_NONE;
340} 340}
341 341
342static void warn_no_thread(unsigned int irq, struct irqaction *action)
343{
344 if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags))
345 return;
346
347 printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD "
348 "but no thread function available.", irq, action->name);
349}
350
342DEFINE_TRACE(irq_handler_entry); 351DEFINE_TRACE(irq_handler_entry);
343DEFINE_TRACE(irq_handler_exit); 352DEFINE_TRACE(irq_handler_exit);
344 353
@@ -363,8 +372,47 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
363 trace_irq_handler_entry(irq, action); 372 trace_irq_handler_entry(irq, action);
364 ret = action->handler(irq, action->dev_id); 373 ret = action->handler(irq, action->dev_id);
365 trace_irq_handler_exit(irq, action, ret); 374 trace_irq_handler_exit(irq, action, ret);
366 if (ret == IRQ_HANDLED) 375
376 switch (ret) {
377 case IRQ_WAKE_THREAD:
378 /*
379 * Set result to handled so the spurious check
380 * does not trigger.
381 */
382 ret = IRQ_HANDLED;
383
384 /*
385 * Catch drivers which return WAKE_THREAD but
386 * did not set up a thread function
387 */
388 if (unlikely(!action->thread_fn)) {
389 warn_no_thread(irq, action);
390 break;
391 }
392
393 /*
394 * Wake up the handler thread for this
395 * action. In case the thread crashed and was
396 * killed we just pretend that we handled the
397 * interrupt. The hardirq handler above has
398 * disabled the device interrupt, so no irq
399 * storm is lurking.
400 */
401 if (likely(!test_bit(IRQTF_DIED,
402 &action->thread_flags))) {
403 set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
404 wake_up_process(action->thread);
405 }
406
407 /* Fall through to add to randomness */
408 case IRQ_HANDLED:
367 status |= action->flags; 409 status |= action->flags;
410 break;
411
412 default:
413 break;
414 }
415
368 retval |= ret; 416 retval |= ret;
369 action = action->next; 417 action = action->next;
370 } while (action); 418 } while (action);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1516ab77355c..7e2e7dd4cd2f 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -8,16 +8,15 @@
8 */ 8 */
9 9
10#include <linux/irq.h> 10#include <linux/irq.h>
11#include <linux/kthread.h>
11#include <linux/module.h> 12#include <linux/module.h>
12#include <linux/random.h> 13#include <linux/random.h>
13#include <linux/interrupt.h> 14#include <linux/interrupt.h>
14#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/sched.h>
15 17
16#include "internals.h" 18#include "internals.h"
17 19
18#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
19cpumask_var_t irq_default_affinity;
20
21/** 20/**
22 * synchronize_irq - wait for pending IRQ handlers (on other CPUs) 21 * synchronize_irq - wait for pending IRQ handlers (on other CPUs)
23 * @irq: interrupt number to wait for 22 * @irq: interrupt number to wait for
@@ -53,9 +52,18 @@ void synchronize_irq(unsigned int irq)
53 52
54 /* Oops, that failed? */ 53 /* Oops, that failed? */
55 } while (status & IRQ_INPROGRESS); 54 } while (status & IRQ_INPROGRESS);
55
56 /*
57 * We made sure that no hardirq handler is running. Now verify
58 * that no threaded handlers are active.
59 */
60 wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active));
56} 61}
57EXPORT_SYMBOL(synchronize_irq); 62EXPORT_SYMBOL(synchronize_irq);
58 63
64#ifdef CONFIG_SMP
65cpumask_var_t irq_default_affinity;
66
59/** 67/**
60 * irq_can_set_affinity - Check if the affinity of a given irq can be set 68 * irq_can_set_affinity - Check if the affinity of a given irq can be set
61 * @irq: Interrupt to check 69 * @irq: Interrupt to check
@@ -72,6 +80,18 @@ int irq_can_set_affinity(unsigned int irq)
72 return 1; 80 return 1;
73} 81}
74 82
83static void
84irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
85{
86 struct irqaction *action = desc->action;
87
88 while (action) {
89 if (action->thread)
90 set_cpus_allowed_ptr(action->thread, cpumask);
91 action = action->next;
92 }
93}
94
75/** 95/**
76 * irq_set_affinity - Set the irq affinity of a given irq 96 * irq_set_affinity - Set the irq affinity of a given irq
77 * @irq: Interrupt to set affinity 97 * @irq: Interrupt to set affinity
@@ -100,6 +120,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
100 cpumask_copy(desc->affinity, cpumask); 120 cpumask_copy(desc->affinity, cpumask);
101 desc->chip->set_affinity(irq, cpumask); 121 desc->chip->set_affinity(irq, cpumask);
102#endif 122#endif
123 irq_set_thread_affinity(desc, cpumask);
103 desc->status |= IRQ_AFFINITY_SET; 124 desc->status |= IRQ_AFFINITY_SET;
104 spin_unlock_irqrestore(&desc->lock, flags); 125 spin_unlock_irqrestore(&desc->lock, flags);
105 return 0; 126 return 0;
@@ -150,6 +171,8 @@ int irq_select_affinity_usr(unsigned int irq)
150 171
151 spin_lock_irqsave(&desc->lock, flags); 172 spin_lock_irqsave(&desc->lock, flags);
152 ret = setup_affinity(irq, desc); 173 ret = setup_affinity(irq, desc);
174 if (!ret)
175 irq_set_thread_affinity(desc, desc->affinity);
153 spin_unlock_irqrestore(&desc->lock, flags); 176 spin_unlock_irqrestore(&desc->lock, flags);
154 177
155 return ret; 178 return ret;
@@ -401,6 +424,90 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
401 return ret; 424 return ret;
402} 425}
403 426
427static int irq_wait_for_interrupt(struct irqaction *action)
428{
429 while (!kthread_should_stop()) {
430 set_current_state(TASK_INTERRUPTIBLE);
431
432 if (test_and_clear_bit(IRQTF_RUNTHREAD,
433 &action->thread_flags)) {
434 __set_current_state(TASK_RUNNING);
435 return 0;
436 }
437 schedule();
438 }
439 return -1;
440}
441
442/*
443 * Interrupt handler thread
444 */
445static int irq_thread(void *data)
446{
447 struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, };
448 struct irqaction *action = data;
449 struct irq_desc *desc = irq_to_desc(action->irq);
450 int wake;
451
452 sched_setscheduler(current, SCHED_FIFO, &param);
453 current->irqaction = action;
454
455 while (!irq_wait_for_interrupt(action)) {
456
457 atomic_inc(&desc->threads_active);
458
459 spin_lock_irq(&desc->lock);
460 if (unlikely(desc->status & IRQ_DISABLED)) {
461 /*
462 * CHECKME: We might need a dedicated
463 * IRQ_THREAD_PENDING flag here, which
464 * retriggers the thread in check_irq_resend()
465 * but AFAICT IRQ_PENDING should be fine as it
466 * retriggers the interrupt itself --- tglx
467 */
468 desc->status |= IRQ_PENDING;
469 spin_unlock_irq(&desc->lock);
470 } else {
471 spin_unlock_irq(&desc->lock);
472
473 action->thread_fn(action->irq, action->dev_id);
474 }
475
476 wake = atomic_dec_and_test(&desc->threads_active);
477
478 if (wake && waitqueue_active(&desc->wait_for_threads))
479 wake_up(&desc->wait_for_threads);
480 }
481
482 /*
483 * Clear irqaction. Otherwise exit_irq_thread() would make
484 * fuzz about an active irq thread going into nirvana.
485 */
486 current->irqaction = NULL;
487 return 0;
488}
489
490/*
491 * Called from do_exit()
492 */
493void exit_irq_thread(void)
494{
495 struct task_struct *tsk = current;
496
497 if (!tsk->irqaction)
498 return;
499
500 printk(KERN_ERR
501 "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
502 tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
503
504 /*
505 * Set the THREAD DIED flag to prevent further wakeups of the
506 * soon to be gone threaded handler.
507 */
508 set_bit(IRQTF_DIED, &tsk->irqaction->flags);
509}
510
404/* 511/*
405 * Internal function to register an irqaction - typically used to 512 * Internal function to register an irqaction - typically used to
406 * allocate special interrupts that are part of the architecture. 513 * allocate special interrupts that are part of the architecture.
@@ -437,6 +544,26 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
437 } 544 }
438 545
439 /* 546 /*
547 * Threaded handler ?
548 */
549 if (new->thread_fn) {
550 struct task_struct *t;
551
552 t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
553 new->name);
554 if (IS_ERR(t))
555 return PTR_ERR(t);
556 /*
557 * We keep the reference to the task struct even if
558 * the thread dies to avoid that the interrupt code
559 * references an already freed task_struct.
560 */
561 get_task_struct(t);
562 new->thread = t;
563 wake_up_process(t);
564 }
565
566 /*
440 * The following block of code has to be executed atomically 567 * The following block of code has to be executed atomically
441 */ 568 */
442 spin_lock_irqsave(&desc->lock, flags); 569 spin_lock_irqsave(&desc->lock, flags);
@@ -473,15 +600,15 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
473 if (!shared) { 600 if (!shared) {
474 irq_chip_set_defaults(desc->chip); 601 irq_chip_set_defaults(desc->chip);
475 602
603 init_waitqueue_head(&desc->wait_for_threads);
604
476 /* Setup the type (level, edge polarity) if configured: */ 605 /* Setup the type (level, edge polarity) if configured: */
477 if (new->flags & IRQF_TRIGGER_MASK) { 606 if (new->flags & IRQF_TRIGGER_MASK) {
478 ret = __irq_set_trigger(desc, irq, 607 ret = __irq_set_trigger(desc, irq,
479 new->flags & IRQF_TRIGGER_MASK); 608 new->flags & IRQF_TRIGGER_MASK);
480 609
481 if (ret) { 610 if (ret)
482 spin_unlock_irqrestore(&desc->lock, flags); 611 goto out_thread;
483 return ret;
484 }
485 } else 612 } else
486 compat_irq_chip_set_default_handler(desc); 613 compat_irq_chip_set_default_handler(desc);
487#if defined(CONFIG_IRQ_PER_CPU) 614#if defined(CONFIG_IRQ_PER_CPU)
@@ -549,8 +676,19 @@ mismatch:
549 dump_stack(); 676 dump_stack();
550 } 677 }
551#endif 678#endif
679 ret = -EBUSY;
680
681out_thread:
552 spin_unlock_irqrestore(&desc->lock, flags); 682 spin_unlock_irqrestore(&desc->lock, flags);
553 return -EBUSY; 683 if (new->thread) {
684 struct task_struct *t = new->thread;
685
686 new->thread = NULL;
687 if (likely(!test_bit(IRQTF_DIED, &new->thread_flags)))
688 kthread_stop(t);
689 put_task_struct(t);
690 }
691 return ret;
554} 692}
555 693
556/** 694/**
@@ -576,6 +714,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
576{ 714{
577 struct irq_desc *desc = irq_to_desc(irq); 715 struct irq_desc *desc = irq_to_desc(irq);
578 struct irqaction *action, **action_ptr; 716 struct irqaction *action, **action_ptr;
717 struct task_struct *irqthread;
579 unsigned long flags; 718 unsigned long flags;
580 719
581 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); 720 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@ -622,6 +761,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
622 else 761 else
623 desc->chip->disable(irq); 762 desc->chip->disable(irq);
624 } 763 }
764
765 irqthread = action->thread;
766 action->thread = NULL;
767
625 spin_unlock_irqrestore(&desc->lock, flags); 768 spin_unlock_irqrestore(&desc->lock, flags);
626 769
627 unregister_handler_proc(irq, action); 770 unregister_handler_proc(irq, action);
@@ -629,6 +772,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
629 /* Make sure it's not being used on another CPU: */ 772 /* Make sure it's not being used on another CPU: */
630 synchronize_irq(irq); 773 synchronize_irq(irq);
631 774
775 if (irqthread) {
776 if (!test_bit(IRQTF_DIED, &action->thread_flags))
777 kthread_stop(irqthread);
778 put_task_struct(irqthread);
779 }
780
632#ifdef CONFIG_DEBUG_SHIRQ 781#ifdef CONFIG_DEBUG_SHIRQ
633 /* 782 /*
634 * It's a shared IRQ -- the driver ought to be prepared for an IRQ 783 * It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@ -681,9 +830,12 @@ void free_irq(unsigned int irq, void *dev_id)
681EXPORT_SYMBOL(free_irq); 830EXPORT_SYMBOL(free_irq);
682 831
683/** 832/**
684 * request_irq - allocate an interrupt line 833 * request_threaded_irq - allocate an interrupt line
685 * @irq: Interrupt line to allocate 834 * @irq: Interrupt line to allocate
686 * @handler: Function to be called when the IRQ occurs 835 * @handler: Function to be called when the IRQ occurs.
836 * Primary handler for threaded interrupts
837 * @thread_fn: Function called from the irq handler thread
838 * If NULL, no irq thread is created
687 * @irqflags: Interrupt type flags 839 * @irqflags: Interrupt type flags
688 * @devname: An ascii name for the claiming device 840 * @devname: An ascii name for the claiming device
689 * @dev_id: A cookie passed back to the handler function 841 * @dev_id: A cookie passed back to the handler function
@@ -695,6 +847,15 @@ EXPORT_SYMBOL(free_irq);
695 * raises, you must take care both to initialise your hardware 847 * raises, you must take care both to initialise your hardware
696 * and to set up the interrupt handler in the right order. 848 * and to set up the interrupt handler in the right order.
697 * 849 *
850 * If you want to set up a threaded irq handler for your device
851 * then you need to supply @handler and @thread_fn. @handler ist
852 * still called in hard interrupt context and has to check
853 * whether the interrupt originates from the device. If yes it
854 * needs to disable the interrupt on the device and return
855 * IRQ_THREAD_WAKE which will wake up the handler thread and run
856 * @thread_fn. This split handler design is necessary to support
857 * shared interrupts.
858 *
698 * Dev_id must be globally unique. Normally the address of the 859 * Dev_id must be globally unique. Normally the address of the
699 * device data structure is used as the cookie. Since the handler 860 * device data structure is used as the cookie. Since the handler
700 * receives this value it makes sense to use it. 861 * receives this value it makes sense to use it.
@@ -710,8 +871,9 @@ EXPORT_SYMBOL(free_irq);
710 * IRQF_TRIGGER_* Specify active edge(s) or level 871 * IRQF_TRIGGER_* Specify active edge(s) or level
711 * 872 *
712 */ 873 */
713int request_irq(unsigned int irq, irq_handler_t handler, 874int request_threaded_irq(unsigned int irq, irq_handler_t handler,
714 unsigned long irqflags, const char *devname, void *dev_id) 875 irq_handler_t thread_fn, unsigned long irqflags,
876 const char *devname, void *dev_id)
715{ 877{
716 struct irqaction *action; 878 struct irqaction *action;
717 struct irq_desc *desc; 879 struct irq_desc *desc;
@@ -759,6 +921,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
759 return -ENOMEM; 921 return -ENOMEM;
760 922
761 action->handler = handler; 923 action->handler = handler;
924 action->thread_fn = thread_fn;
762 action->flags = irqflags; 925 action->flags = irqflags;
763 action->name = devname; 926 action->name = devname;
764 action->dev_id = dev_id; 927 action->dev_id = dev_id;
@@ -788,4 +951,4 @@ int request_irq(unsigned int irq, irq_handler_t handler,
788#endif 951#endif
789 return retval; 952 return retval;
790} 953}
791EXPORT_SYMBOL(request_irq); 954EXPORT_SYMBOL(request_threaded_irq);
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 243d6121e50e..44bbdcbaf8d2 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -54,6 +54,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
54static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) 54static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
55{ 55{
56 free_kstat_irqs(old_desc, desc); 56 free_kstat_irqs(old_desc, desc);
57 free_desc_masks(old_desc, desc);
57 arch_free_chip_data(old_desc, desc); 58 arch_free_chip_data(old_desc, desc);
58} 59}
59 60
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 5016bfb682b9..a5e74ddee0e2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -68,7 +68,7 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
68static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; 68static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
69 69
70/* NOTE: change this value only with kprobe_mutex held */ 70/* NOTE: change this value only with kprobe_mutex held */
71static bool kprobe_enabled; 71static bool kprobes_all_disarmed;
72 72
73static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 73static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
74static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 74static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
@@ -328,7 +328,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
328 struct kprobe *kp; 328 struct kprobe *kp;
329 329
330 list_for_each_entry_rcu(kp, &p->list, list) { 330 list_for_each_entry_rcu(kp, &p->list, list) {
331 if (kp->pre_handler && !kprobe_gone(kp)) { 331 if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
332 set_kprobe_instance(kp); 332 set_kprobe_instance(kp);
333 if (kp->pre_handler(kp, regs)) 333 if (kp->pre_handler(kp, regs))
334 return 1; 334 return 1;
@@ -344,7 +344,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
344 struct kprobe *kp; 344 struct kprobe *kp;
345 345
346 list_for_each_entry_rcu(kp, &p->list, list) { 346 list_for_each_entry_rcu(kp, &p->list, list) {
347 if (kp->post_handler && !kprobe_gone(kp)) { 347 if (kp->post_handler && likely(!kprobe_disabled(kp))) {
348 set_kprobe_instance(kp); 348 set_kprobe_instance(kp);
349 kp->post_handler(kp, regs, flags); 349 kp->post_handler(kp, regs, flags);
350 reset_kprobe_instance(); 350 reset_kprobe_instance();
@@ -518,20 +518,28 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
518} 518}
519 519
520/* 520/*
521* Add the new probe to old_p->list. Fail if this is the 521* Add the new probe to ap->list. Fail if this is the
522* second jprobe at the address - two jprobes can't coexist 522* second jprobe at the address - two jprobes can't coexist
523*/ 523*/
524static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) 524static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
525{ 525{
526 BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
526 if (p->break_handler) { 527 if (p->break_handler) {
527 if (old_p->break_handler) 528 if (ap->break_handler)
528 return -EEXIST; 529 return -EEXIST;
529 list_add_tail_rcu(&p->list, &old_p->list); 530 list_add_tail_rcu(&p->list, &ap->list);
530 old_p->break_handler = aggr_break_handler; 531 ap->break_handler = aggr_break_handler;
531 } else 532 } else
532 list_add_rcu(&p->list, &old_p->list); 533 list_add_rcu(&p->list, &ap->list);
533 if (p->post_handler && !old_p->post_handler) 534 if (p->post_handler && !ap->post_handler)
534 old_p->post_handler = aggr_post_handler; 535 ap->post_handler = aggr_post_handler;
536
537 if (kprobe_disabled(ap) && !kprobe_disabled(p)) {
538 ap->flags &= ~KPROBE_FLAG_DISABLED;
539 if (!kprobes_all_disarmed)
540 /* Arm the breakpoint again. */
541 arch_arm_kprobe(ap);
542 }
535 return 0; 543 return 0;
536} 544}
537 545
@@ -544,6 +552,7 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
544 copy_kprobe(p, ap); 552 copy_kprobe(p, ap);
545 flush_insn_slot(ap); 553 flush_insn_slot(ap);
546 ap->addr = p->addr; 554 ap->addr = p->addr;
555 ap->flags = p->flags;
547 ap->pre_handler = aggr_pre_handler; 556 ap->pre_handler = aggr_pre_handler;
548 ap->fault_handler = aggr_fault_handler; 557 ap->fault_handler = aggr_fault_handler;
549 /* We don't care the kprobe which has gone. */ 558 /* We don't care the kprobe which has gone. */
@@ -566,44 +575,59 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
566 struct kprobe *p) 575 struct kprobe *p)
567{ 576{
568 int ret = 0; 577 int ret = 0;
569 struct kprobe *ap; 578 struct kprobe *ap = old_p;
570 579
571 if (kprobe_gone(old_p)) { 580 if (old_p->pre_handler != aggr_pre_handler) {
581 /* If old_p is not an aggr_probe, create new aggr_kprobe. */
582 ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
583 if (!ap)
584 return -ENOMEM;
585 add_aggr_kprobe(ap, old_p);
586 }
587
588 if (kprobe_gone(ap)) {
572 /* 589 /*
573 * Attempting to insert new probe at the same location that 590 * Attempting to insert new probe at the same location that
574 * had a probe in the module vaddr area which already 591 * had a probe in the module vaddr area which already
575 * freed. So, the instruction slot has already been 592 * freed. So, the instruction slot has already been
576 * released. We need a new slot for the new probe. 593 * released. We need a new slot for the new probe.
577 */ 594 */
578 ret = arch_prepare_kprobe(old_p); 595 ret = arch_prepare_kprobe(ap);
579 if (ret) 596 if (ret)
597 /*
598 * Even if fail to allocate new slot, don't need to
599 * free aggr_probe. It will be used next time, or
600 * freed by unregister_kprobe.
601 */
580 return ret; 602 return ret;
581 } 603
582 if (old_p->pre_handler == aggr_pre_handler) {
583 copy_kprobe(old_p, p);
584 ret = add_new_kprobe(old_p, p);
585 ap = old_p;
586 } else {
587 ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
588 if (!ap) {
589 if (kprobe_gone(old_p))
590 arch_remove_kprobe(old_p);
591 return -ENOMEM;
592 }
593 add_aggr_kprobe(ap, old_p);
594 copy_kprobe(ap, p);
595 ret = add_new_kprobe(ap, p);
596 }
597 if (kprobe_gone(old_p)) {
598 /* 604 /*
599 * If the old_p has gone, its breakpoint has been disarmed. 605 * Clear gone flag to prevent allocating new slot again, and
600 * We have to arm it again after preparing real kprobes. 606 * set disabled flag because it is not armed yet.
601 */ 607 */
602 ap->flags &= ~KPROBE_FLAG_GONE; 608 ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
603 if (kprobe_enabled) 609 | KPROBE_FLAG_DISABLED;
604 arch_arm_kprobe(ap);
605 } 610 }
606 return ret; 611
612 copy_kprobe(ap, p);
613 return add_new_kprobe(ap, p);
614}
615
616/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
617static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
618{
619 struct kprobe *kp;
620
621 list_for_each_entry_rcu(kp, &p->list, list) {
622 if (!kprobe_disabled(kp))
623 /*
624 * There is an active probe on the list.
625 * We can't disable aggr_kprobe.
626 */
627 return 0;
628 }
629 p->flags |= KPROBE_FLAG_DISABLED;
630 return 1;
607} 631}
608 632
609static int __kprobes in_kprobes_functions(unsigned long addr) 633static int __kprobes in_kprobes_functions(unsigned long addr)
@@ -664,7 +688,9 @@ int __kprobes register_kprobe(struct kprobe *p)
664 return -EINVAL; 688 return -EINVAL;
665 } 689 }
666 690
667 p->flags = 0; 691 /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
692 p->flags &= KPROBE_FLAG_DISABLED;
693
668 /* 694 /*
669 * Check if are we probing a module. 695 * Check if are we probing a module.
670 */ 696 */
@@ -709,7 +735,7 @@ int __kprobes register_kprobe(struct kprobe *p)
709 hlist_add_head_rcu(&p->hlist, 735 hlist_add_head_rcu(&p->hlist,
710 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 736 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
711 737
712 if (kprobe_enabled) 738 if (!kprobes_all_disarmed && !kprobe_disabled(p))
713 arch_arm_kprobe(p); 739 arch_arm_kprobe(p);
714 740
715out_unlock_text: 741out_unlock_text:
@@ -722,26 +748,39 @@ out:
722 748
723 return ret; 749 return ret;
724} 750}
751EXPORT_SYMBOL_GPL(register_kprobe);
725 752
726/* 753/* Check passed kprobe is valid and return kprobe in kprobe_table. */
727 * Unregister a kprobe without a scheduler synchronization. 754static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
728 */
729static int __kprobes __unregister_kprobe_top(struct kprobe *p)
730{ 755{
731 struct kprobe *old_p, *list_p; 756 struct kprobe *old_p, *list_p;
732 757
733 old_p = get_kprobe(p->addr); 758 old_p = get_kprobe(p->addr);
734 if (unlikely(!old_p)) 759 if (unlikely(!old_p))
735 return -EINVAL; 760 return NULL;
736 761
737 if (p != old_p) { 762 if (p != old_p) {
738 list_for_each_entry_rcu(list_p, &old_p->list, list) 763 list_for_each_entry_rcu(list_p, &old_p->list, list)
739 if (list_p == p) 764 if (list_p == p)
740 /* kprobe p is a valid probe */ 765 /* kprobe p is a valid probe */
741 goto valid_p; 766 goto valid;
742 return -EINVAL; 767 return NULL;
743 } 768 }
744valid_p: 769valid:
770 return old_p;
771}
772
773/*
774 * Unregister a kprobe without a scheduler synchronization.
775 */
776static int __kprobes __unregister_kprobe_top(struct kprobe *p)
777{
778 struct kprobe *old_p, *list_p;
779
780 old_p = __get_valid_kprobe(p);
781 if (old_p == NULL)
782 return -EINVAL;
783
745 if (old_p == p || 784 if (old_p == p ||
746 (old_p->pre_handler == aggr_pre_handler && 785 (old_p->pre_handler == aggr_pre_handler &&
747 list_is_singular(&old_p->list))) { 786 list_is_singular(&old_p->list))) {
@@ -750,7 +789,7 @@ valid_p:
750 * enabled and not gone - otherwise, the breakpoint would 789 * enabled and not gone - otherwise, the breakpoint would
751 * already have been removed. We save on flushing icache. 790 * already have been removed. We save on flushing icache.
752 */ 791 */
753 if (kprobe_enabled && !kprobe_gone(old_p)) { 792 if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) {
754 mutex_lock(&text_mutex); 793 mutex_lock(&text_mutex);
755 arch_disarm_kprobe(p); 794 arch_disarm_kprobe(p);
756 mutex_unlock(&text_mutex); 795 mutex_unlock(&text_mutex);
@@ -768,6 +807,11 @@ valid_p:
768 } 807 }
769noclean: 808noclean:
770 list_del_rcu(&p->list); 809 list_del_rcu(&p->list);
810 if (!kprobe_disabled(old_p)) {
811 try_to_disable_aggr_kprobe(old_p);
812 if (!kprobes_all_disarmed && kprobe_disabled(old_p))
813 arch_disarm_kprobe(old_p);
814 }
771 } 815 }
772 return 0; 816 return 0;
773} 817}
@@ -803,11 +847,13 @@ int __kprobes register_kprobes(struct kprobe **kps, int num)
803 } 847 }
804 return ret; 848 return ret;
805} 849}
850EXPORT_SYMBOL_GPL(register_kprobes);
806 851
807void __kprobes unregister_kprobe(struct kprobe *p) 852void __kprobes unregister_kprobe(struct kprobe *p)
808{ 853{
809 unregister_kprobes(&p, 1); 854 unregister_kprobes(&p, 1);
810} 855}
856EXPORT_SYMBOL_GPL(unregister_kprobe);
811 857
812void __kprobes unregister_kprobes(struct kprobe **kps, int num) 858void __kprobes unregister_kprobes(struct kprobe **kps, int num)
813{ 859{
@@ -826,6 +872,7 @@ void __kprobes unregister_kprobes(struct kprobe **kps, int num)
826 if (kps[i]->addr) 872 if (kps[i]->addr)
827 __unregister_kprobe_bottom(kps[i]); 873 __unregister_kprobe_bottom(kps[i]);
828} 874}
875EXPORT_SYMBOL_GPL(unregister_kprobes);
829 876
830static struct notifier_block kprobe_exceptions_nb = { 877static struct notifier_block kprobe_exceptions_nb = {
831 .notifier_call = kprobe_exceptions_notify, 878 .notifier_call = kprobe_exceptions_notify,
@@ -865,16 +912,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num)
865 } 912 }
866 return ret; 913 return ret;
867} 914}
915EXPORT_SYMBOL_GPL(register_jprobes);
868 916
869int __kprobes register_jprobe(struct jprobe *jp) 917int __kprobes register_jprobe(struct jprobe *jp)
870{ 918{
871 return register_jprobes(&jp, 1); 919 return register_jprobes(&jp, 1);
872} 920}
921EXPORT_SYMBOL_GPL(register_jprobe);
873 922
874void __kprobes unregister_jprobe(struct jprobe *jp) 923void __kprobes unregister_jprobe(struct jprobe *jp)
875{ 924{
876 unregister_jprobes(&jp, 1); 925 unregister_jprobes(&jp, 1);
877} 926}
927EXPORT_SYMBOL_GPL(unregister_jprobe);
878 928
879void __kprobes unregister_jprobes(struct jprobe **jps, int num) 929void __kprobes unregister_jprobes(struct jprobe **jps, int num)
880{ 930{
@@ -894,6 +944,7 @@ void __kprobes unregister_jprobes(struct jprobe **jps, int num)
894 __unregister_kprobe_bottom(&jps[i]->kp); 944 __unregister_kprobe_bottom(&jps[i]->kp);
895 } 945 }
896} 946}
947EXPORT_SYMBOL_GPL(unregister_jprobes);
897 948
898#ifdef CONFIG_KRETPROBES 949#ifdef CONFIG_KRETPROBES
899/* 950/*
@@ -987,6 +1038,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
987 free_rp_inst(rp); 1038 free_rp_inst(rp);
988 return ret; 1039 return ret;
989} 1040}
1041EXPORT_SYMBOL_GPL(register_kretprobe);
990 1042
991int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1043int __kprobes register_kretprobes(struct kretprobe **rps, int num)
992{ 1044{
@@ -1004,11 +1056,13 @@ int __kprobes register_kretprobes(struct kretprobe **rps, int num)
1004 } 1056 }
1005 return ret; 1057 return ret;
1006} 1058}
1059EXPORT_SYMBOL_GPL(register_kretprobes);
1007 1060
1008void __kprobes unregister_kretprobe(struct kretprobe *rp) 1061void __kprobes unregister_kretprobe(struct kretprobe *rp)
1009{ 1062{
1010 unregister_kretprobes(&rp, 1); 1063 unregister_kretprobes(&rp, 1);
1011} 1064}
1065EXPORT_SYMBOL_GPL(unregister_kretprobe);
1012 1066
1013void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1067void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1014{ 1068{
@@ -1030,24 +1084,30 @@ void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1030 } 1084 }
1031 } 1085 }
1032} 1086}
1087EXPORT_SYMBOL_GPL(unregister_kretprobes);
1033 1088
1034#else /* CONFIG_KRETPROBES */ 1089#else /* CONFIG_KRETPROBES */
1035int __kprobes register_kretprobe(struct kretprobe *rp) 1090int __kprobes register_kretprobe(struct kretprobe *rp)
1036{ 1091{
1037 return -ENOSYS; 1092 return -ENOSYS;
1038} 1093}
1094EXPORT_SYMBOL_GPL(register_kretprobe);
1039 1095
1040int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1096int __kprobes register_kretprobes(struct kretprobe **rps, int num)
1041{ 1097{
1042 return -ENOSYS; 1098 return -ENOSYS;
1043} 1099}
1100EXPORT_SYMBOL_GPL(register_kretprobes);
1101
1044void __kprobes unregister_kretprobe(struct kretprobe *rp) 1102void __kprobes unregister_kretprobe(struct kretprobe *rp)
1045{ 1103{
1046} 1104}
1105EXPORT_SYMBOL_GPL(unregister_kretprobe);
1047 1106
1048void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1107void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1049{ 1108{
1050} 1109}
1110EXPORT_SYMBOL_GPL(unregister_kretprobes);
1051 1111
1052static int __kprobes pre_handler_kretprobe(struct kprobe *p, 1112static int __kprobes pre_handler_kretprobe(struct kprobe *p,
1053 struct pt_regs *regs) 1113 struct pt_regs *regs)
@@ -1061,6 +1121,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
1061static void __kprobes kill_kprobe(struct kprobe *p) 1121static void __kprobes kill_kprobe(struct kprobe *p)
1062{ 1122{
1063 struct kprobe *kp; 1123 struct kprobe *kp;
1124
1064 p->flags |= KPROBE_FLAG_GONE; 1125 p->flags |= KPROBE_FLAG_GONE;
1065 if (p->pre_handler == aggr_pre_handler) { 1126 if (p->pre_handler == aggr_pre_handler) {
1066 /* 1127 /*
@@ -1173,8 +1234,8 @@ static int __init init_kprobes(void)
1173 } 1234 }
1174 } 1235 }
1175 1236
1176 /* By default, kprobes are enabled */ 1237 /* By default, kprobes are armed */
1177 kprobe_enabled = true; 1238 kprobes_all_disarmed = false;
1178 1239
1179 err = arch_init_kprobes(); 1240 err = arch_init_kprobes();
1180 if (!err) 1241 if (!err)
@@ -1202,12 +1263,18 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
1202 else 1263 else
1203 kprobe_type = "k"; 1264 kprobe_type = "k";
1204 if (sym) 1265 if (sym)
1205 seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type, 1266 seq_printf(pi, "%p %s %s+0x%x %s %s%s\n",
1206 sym, offset, (modname ? modname : " "), 1267 p->addr, kprobe_type, sym, offset,
1207 (kprobe_gone(p) ? "[GONE]" : "")); 1268 (modname ? modname : " "),
1269 (kprobe_gone(p) ? "[GONE]" : ""),
1270 ((kprobe_disabled(p) && !kprobe_gone(p)) ?
1271 "[DISABLED]" : ""));
1208 else 1272 else
1209 seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr, 1273 seq_printf(pi, "%p %s %p %s%s\n",
1210 (kprobe_gone(p) ? "[GONE]" : "")); 1274 p->addr, kprobe_type, p->addr,
1275 (kprobe_gone(p) ? "[GONE]" : ""),
1276 ((kprobe_disabled(p) && !kprobe_gone(p)) ?
1277 "[DISABLED]" : ""));
1211} 1278}
1212 1279
1213static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) 1280static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -1272,7 +1339,72 @@ static struct file_operations debugfs_kprobes_operations = {
1272 .release = seq_release, 1339 .release = seq_release,
1273}; 1340};
1274 1341
1275static void __kprobes enable_all_kprobes(void) 1342/* Disable one kprobe */
1343int __kprobes disable_kprobe(struct kprobe *kp)
1344{
1345 int ret = 0;
1346 struct kprobe *p;
1347
1348 mutex_lock(&kprobe_mutex);
1349
1350 /* Check whether specified probe is valid. */
1351 p = __get_valid_kprobe(kp);
1352 if (unlikely(p == NULL)) {
1353 ret = -EINVAL;
1354 goto out;
1355 }
1356
1357 /* If the probe is already disabled (or gone), just return */
1358 if (kprobe_disabled(kp))
1359 goto out;
1360
1361 kp->flags |= KPROBE_FLAG_DISABLED;
1362 if (p != kp)
1363 /* When kp != p, p is always enabled. */
1364 try_to_disable_aggr_kprobe(p);
1365
1366 if (!kprobes_all_disarmed && kprobe_disabled(p))
1367 arch_disarm_kprobe(p);
1368out:
1369 mutex_unlock(&kprobe_mutex);
1370 return ret;
1371}
1372EXPORT_SYMBOL_GPL(disable_kprobe);
1373
1374/* Enable one kprobe */
1375int __kprobes enable_kprobe(struct kprobe *kp)
1376{
1377 int ret = 0;
1378 struct kprobe *p;
1379
1380 mutex_lock(&kprobe_mutex);
1381
1382 /* Check whether specified probe is valid. */
1383 p = __get_valid_kprobe(kp);
1384 if (unlikely(p == NULL)) {
1385 ret = -EINVAL;
1386 goto out;
1387 }
1388
1389 if (kprobe_gone(kp)) {
1390 /* This kprobe has gone, we couldn't enable it. */
1391 ret = -EINVAL;
1392 goto out;
1393 }
1394
1395 if (!kprobes_all_disarmed && kprobe_disabled(p))
1396 arch_arm_kprobe(p);
1397
1398 p->flags &= ~KPROBE_FLAG_DISABLED;
1399 if (p != kp)
1400 kp->flags &= ~KPROBE_FLAG_DISABLED;
1401out:
1402 mutex_unlock(&kprobe_mutex);
1403 return ret;
1404}
1405EXPORT_SYMBOL_GPL(enable_kprobe);
1406
1407static void __kprobes arm_all_kprobes(void)
1276{ 1408{
1277 struct hlist_head *head; 1409 struct hlist_head *head;
1278 struct hlist_node *node; 1410 struct hlist_node *node;
@@ -1281,20 +1413,20 @@ static void __kprobes enable_all_kprobes(void)
1281 1413
1282 mutex_lock(&kprobe_mutex); 1414 mutex_lock(&kprobe_mutex);
1283 1415
1284 /* If kprobes are already enabled, just return */ 1416 /* If kprobes are armed, just return */
1285 if (kprobe_enabled) 1417 if (!kprobes_all_disarmed)
1286 goto already_enabled; 1418 goto already_enabled;
1287 1419
1288 mutex_lock(&text_mutex); 1420 mutex_lock(&text_mutex);
1289 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1421 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1290 head = &kprobe_table[i]; 1422 head = &kprobe_table[i];
1291 hlist_for_each_entry_rcu(p, node, head, hlist) 1423 hlist_for_each_entry_rcu(p, node, head, hlist)
1292 if (!kprobe_gone(p)) 1424 if (!kprobe_disabled(p))
1293 arch_arm_kprobe(p); 1425 arch_arm_kprobe(p);
1294 } 1426 }
1295 mutex_unlock(&text_mutex); 1427 mutex_unlock(&text_mutex);
1296 1428
1297 kprobe_enabled = true; 1429 kprobes_all_disarmed = false;
1298 printk(KERN_INFO "Kprobes globally enabled\n"); 1430 printk(KERN_INFO "Kprobes globally enabled\n");
1299 1431
1300already_enabled: 1432already_enabled:
@@ -1302,7 +1434,7 @@ already_enabled:
1302 return; 1434 return;
1303} 1435}
1304 1436
1305static void __kprobes disable_all_kprobes(void) 1437static void __kprobes disarm_all_kprobes(void)
1306{ 1438{
1307 struct hlist_head *head; 1439 struct hlist_head *head;
1308 struct hlist_node *node; 1440 struct hlist_node *node;
@@ -1311,17 +1443,17 @@ static void __kprobes disable_all_kprobes(void)
1311 1443
1312 mutex_lock(&kprobe_mutex); 1444 mutex_lock(&kprobe_mutex);
1313 1445
1314 /* If kprobes are already disabled, just return */ 1446 /* If kprobes are already disarmed, just return */
1315 if (!kprobe_enabled) 1447 if (kprobes_all_disarmed)
1316 goto already_disabled; 1448 goto already_disabled;
1317 1449
1318 kprobe_enabled = false; 1450 kprobes_all_disarmed = true;
1319 printk(KERN_INFO "Kprobes globally disabled\n"); 1451 printk(KERN_INFO "Kprobes globally disabled\n");
1320 mutex_lock(&text_mutex); 1452 mutex_lock(&text_mutex);
1321 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1453 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1322 head = &kprobe_table[i]; 1454 head = &kprobe_table[i];
1323 hlist_for_each_entry_rcu(p, node, head, hlist) { 1455 hlist_for_each_entry_rcu(p, node, head, hlist) {
1324 if (!arch_trampoline_kprobe(p) && !kprobe_gone(p)) 1456 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
1325 arch_disarm_kprobe(p); 1457 arch_disarm_kprobe(p);
1326 } 1458 }
1327 } 1459 }
@@ -1347,7 +1479,7 @@ static ssize_t read_enabled_file_bool(struct file *file,
1347{ 1479{
1348 char buf[3]; 1480 char buf[3];
1349 1481
1350 if (kprobe_enabled) 1482 if (!kprobes_all_disarmed)
1351 buf[0] = '1'; 1483 buf[0] = '1';
1352 else 1484 else
1353 buf[0] = '0'; 1485 buf[0] = '0';
@@ -1370,12 +1502,12 @@ static ssize_t write_enabled_file_bool(struct file *file,
1370 case 'y': 1502 case 'y':
1371 case 'Y': 1503 case 'Y':
1372 case '1': 1504 case '1':
1373 enable_all_kprobes(); 1505 arm_all_kprobes();
1374 break; 1506 break;
1375 case 'n': 1507 case 'n':
1376 case 'N': 1508 case 'N':
1377 case '0': 1509 case '0':
1378 disable_all_kprobes(); 1510 disarm_all_kprobes();
1379 break; 1511 break;
1380 } 1512 }
1381 1513
@@ -1418,16 +1550,5 @@ late_initcall(debugfs_kprobe_init);
1418 1550
1419module_init(init_kprobes); 1551module_init(init_kprobes);
1420 1552
1421EXPORT_SYMBOL_GPL(register_kprobe); 1553/* defined in arch/.../kernel/kprobes.c */
1422EXPORT_SYMBOL_GPL(unregister_kprobe);
1423EXPORT_SYMBOL_GPL(register_kprobes);
1424EXPORT_SYMBOL_GPL(unregister_kprobes);
1425EXPORT_SYMBOL_GPL(register_jprobe);
1426EXPORT_SYMBOL_GPL(unregister_jprobe);
1427EXPORT_SYMBOL_GPL(register_jprobes);
1428EXPORT_SYMBOL_GPL(unregister_jprobes);
1429EXPORT_SYMBOL_GPL(jprobe_return); 1554EXPORT_SYMBOL_GPL(jprobe_return);
1430EXPORT_SYMBOL_GPL(register_kretprobe);
1431EXPORT_SYMBOL_GPL(unregister_kretprobe);
1432EXPORT_SYMBOL_GPL(register_kretprobes);
1433EXPORT_SYMBOL_GPL(unregister_kretprobes);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 84bbadd4d021..4ebaf8519abf 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -76,6 +76,7 @@ static int kthread(void *_create)
76 76
77 /* OK, tell user we're spawned, wait for stop or wakeup */ 77 /* OK, tell user we're spawned, wait for stop or wakeup */
78 __set_current_state(TASK_UNINTERRUPTIBLE); 78 __set_current_state(TASK_UNINTERRUPTIBLE);
79 create->result = current;
79 complete(&create->started); 80 complete(&create->started);
80 schedule(); 81 schedule();
81 82
@@ -96,22 +97,10 @@ static void create_kthread(struct kthread_create_info *create)
96 97
97 /* We want our own signal handler (we take no signals by default). */ 98 /* We want our own signal handler (we take no signals by default). */
98 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); 99 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
99 if (pid < 0) { 100 if (pid < 0)
100 create->result = ERR_PTR(pid); 101 create->result = ERR_PTR(pid);
101 } else { 102 else
102 struct sched_param param = { .sched_priority = 0 };
103 wait_for_completion(&create->started); 103 wait_for_completion(&create->started);
104 read_lock(&tasklist_lock);
105 create->result = find_task_by_pid_ns(pid, &init_pid_ns);
106 read_unlock(&tasklist_lock);
107 /*
108 * root may have changed our (kthreadd's) priority or CPU mask.
109 * The kernel thread should not inherit these properties.
110 */
111 sched_setscheduler(create->result, SCHED_NORMAL, &param);
112 set_user_nice(create->result, KTHREAD_NICE_LEVEL);
113 set_cpus_allowed_ptr(create->result, cpu_all_mask);
114 }
115 complete(&create->done); 104 complete(&create->done);
116} 105}
117 106
@@ -154,11 +143,20 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
154 wait_for_completion(&create.done); 143 wait_for_completion(&create.done);
155 144
156 if (!IS_ERR(create.result)) { 145 if (!IS_ERR(create.result)) {
146 struct sched_param param = { .sched_priority = 0 };
157 va_list args; 147 va_list args;
148
158 va_start(args, namefmt); 149 va_start(args, namefmt);
159 vsnprintf(create.result->comm, sizeof(create.result->comm), 150 vsnprintf(create.result->comm, sizeof(create.result->comm),
160 namefmt, args); 151 namefmt, args);
161 va_end(args); 152 va_end(args);
153 /*
154 * root may have changed our (kthreadd's) priority or CPU mask.
155 * The kernel thread should not inherit these properties.
156 */
157 sched_setscheduler_nocheck(create.result, SCHED_NORMAL, &param);
158 set_user_nice(create.result, KTHREAD_NICE_LEVEL);
159 set_cpus_allowed_ptr(create.result, cpu_all_mask);
162 } 160 }
163 return create.result; 161 return create.result;
164} 162}
diff --git a/kernel/module.c b/kernel/module.c
index c268a771595c..e797812a4d95 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1952,9 +1952,6 @@ static noinline struct module *load_module(void __user *umod,
1952 if (strstarts(secstrings+sechdrs[i].sh_name, ".exit")) 1952 if (strstarts(secstrings+sechdrs[i].sh_name, ".exit"))
1953 sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; 1953 sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC;
1954#endif 1954#endif
1955 /* Don't keep __versions around; it's just for loading. */
1956 if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0)
1957 sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC;
1958 } 1955 }
1959 1956
1960 modindex = find_sec(hdr, sechdrs, secstrings, 1957 modindex = find_sec(hdr, sechdrs, secstrings,
@@ -2391,6 +2388,9 @@ SYSCALL_DEFINE3(init_module, void __user *, umod,
2391 blocking_notifier_call_chain(&module_notify_list, 2388 blocking_notifier_call_chain(&module_notify_list,
2392 MODULE_STATE_LIVE, mod); 2389 MODULE_STATE_LIVE, mod);
2393 2390
2391 /* We need to finish all async code before the module init sequence is done */
2392 async_synchronize_full();
2393
2394 mutex_lock(&module_mutex); 2394 mutex_lock(&module_mutex);
2395 /* Drop initial reference. */ 2395 /* Drop initial reference. */
2396 module_put(mod); 2396 module_put(mod);
diff --git a/kernel/mutex.c b/kernel/mutex.c
index 5d79781394a3..507cf2b5e9f1 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -148,7 +148,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
148 148
149 preempt_disable(); 149 preempt_disable();
150 mutex_acquire(&lock->dep_map, subclass, 0, ip); 150 mutex_acquire(&lock->dep_map, subclass, 0, ip);
151#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) 151#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES) && \
152 !defined(CONFIG_HAVE_DEFAULT_NO_SPIN_MUTEXES)
152 /* 153 /*
153 * Optimistic spinning. 154 * Optimistic spinning.
154 * 155 *
diff --git a/kernel/panic.c b/kernel/panic.c
index 3fd8c5bf8b39..934fb377f4b3 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -213,8 +213,16 @@ unsigned long get_taint(void)
213 213
214void add_taint(unsigned flag) 214void add_taint(unsigned flag)
215{ 215{
216 /* can't trust the integrity of the kernel anymore: */ 216 /*
217 debug_locks = 0; 217 * Can't trust the integrity of the kernel anymore.
218 * We don't call directly debug_locks_off() because the issue
219 * is not necessarily serious enough to set oops_in_progress to 1
220 * Also we want to keep up lockdep for staging development and
221 * post-warning case.
222 */
223 if (flag != TAINT_CRAP && flag != TAINT_WARN && __debug_locks_off())
224 printk(KERN_WARNING "Disabling lockdep due to kernel taint\n");
225
218 set_bit(flag, &tainted_mask); 226 set_bit(flag, &tainted_mask);
219} 227}
220EXPORT_SYMBOL(add_taint); 228EXPORT_SYMBOL(add_taint);
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 8e5d9a68b022..c9dcf98b4463 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -18,7 +18,7 @@ void update_rlimit_cpu(unsigned long rlim_new)
18 18
19 cputime = secs_to_cputime(rlim_new); 19 cputime = secs_to_cputime(rlim_new);
20 if (cputime_eq(current->signal->it_prof_expires, cputime_zero) || 20 if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
21 cputime_lt(current->signal->it_prof_expires, cputime)) { 21 cputime_gt(current->signal->it_prof_expires, cputime)) {
22 spin_lock_irq(&current->sighand->siglock); 22 spin_lock_irq(&current->sighand->siglock);
23 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); 23 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
24 spin_unlock_irq(&current->sighand->siglock); 24 spin_unlock_irq(&current->sighand->siglock);
@@ -224,7 +224,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
224 cpu->cpu = virt_ticks(p); 224 cpu->cpu = virt_ticks(p);
225 break; 225 break;
226 case CPUCLOCK_SCHED: 226 case CPUCLOCK_SCHED:
227 cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p); 227 cpu->sched = task_sched_runtime(p);
228 break; 228 break;
229 } 229 }
230 return 0; 230 return 0;
@@ -305,18 +305,19 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
305{ 305{
306 struct task_cputime cputime; 306 struct task_cputime cputime;
307 307
308 thread_group_cputime(p, &cputime);
309 switch (CPUCLOCK_WHICH(which_clock)) { 308 switch (CPUCLOCK_WHICH(which_clock)) {
310 default: 309 default:
311 return -EINVAL; 310 return -EINVAL;
312 case CPUCLOCK_PROF: 311 case CPUCLOCK_PROF:
312 thread_group_cputime(p, &cputime);
313 cpu->cpu = cputime_add(cputime.utime, cputime.stime); 313 cpu->cpu = cputime_add(cputime.utime, cputime.stime);
314 break; 314 break;
315 case CPUCLOCK_VIRT: 315 case CPUCLOCK_VIRT:
316 thread_group_cputime(p, &cputime);
316 cpu->cpu = cputime.utime; 317 cpu->cpu = cputime.utime;
317 break; 318 break;
318 case CPUCLOCK_SCHED: 319 case CPUCLOCK_SCHED:
319 cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); 320 cpu->sched = thread_group_sched_runtime(p);
320 break; 321 break;
321 } 322 }
322 return 0; 323 return 0;
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 5f21ab2bbcdf..0854770b63b9 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -22,6 +22,7 @@
22#include <linux/console.h> 22#include <linux/console.h>
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24#include <linux/freezer.h> 24#include <linux/freezer.h>
25#include <scsi/scsi_scan.h>
25#include <asm/suspend.h> 26#include <asm/suspend.h>
26 27
27#include "power.h" 28#include "power.h"
@@ -645,6 +646,13 @@ static int software_resume(void)
645 return 0; 646 return 0;
646 647
647 /* 648 /*
649 * We can't depend on SCSI devices being available after loading one of
650 * their modules if scsi_complete_async_scans() is not called and the
651 * resume device usually is a SCSI one.
652 */
653 scsi_complete_async_scans();
654
655 /*
648 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs 656 * name_to_dev_t() below takes a sysfs buffer mutex when sysfs
649 * is configured into the kernel. Since the regular hibernate 657 * is configured into the kernel. Since the regular hibernate
650 * trigger path is via sysfs which takes a buffer mutex before 658 * trigger path is via sysfs which takes a buffer mutex before
diff --git a/kernel/power/main.c b/kernel/power/main.c
index f172f41858bb..f99ed6a75eac 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -291,20 +291,26 @@ static int suspend_enter(suspend_state_t state)
291 291
292 device_pm_lock(); 292 device_pm_lock();
293 293
294 if (suspend_ops->prepare) {
295 error = suspend_ops->prepare();
296 if (error)
297 goto Done;
298 }
299
294 error = device_power_down(PMSG_SUSPEND); 300 error = device_power_down(PMSG_SUSPEND);
295 if (error) { 301 if (error) {
296 printk(KERN_ERR "PM: Some devices failed to power down\n"); 302 printk(KERN_ERR "PM: Some devices failed to power down\n");
297 goto Done; 303 goto Platfrom_finish;
298 } 304 }
299 305
300 if (suspend_ops->prepare) { 306 if (suspend_ops->prepare_late) {
301 error = suspend_ops->prepare(); 307 error = suspend_ops->prepare_late();
302 if (error) 308 if (error)
303 goto Power_up_devices; 309 goto Power_up_devices;
304 } 310 }
305 311
306 if (suspend_test(TEST_PLATFORM)) 312 if (suspend_test(TEST_PLATFORM))
307 goto Platfrom_finish; 313 goto Platform_wake;
308 314
309 error = disable_nonboot_cpus(); 315 error = disable_nonboot_cpus();
310 if (error || suspend_test(TEST_CPUS)) 316 if (error || suspend_test(TEST_CPUS))
@@ -326,13 +332,17 @@ static int suspend_enter(suspend_state_t state)
326 Enable_cpus: 332 Enable_cpus:
327 enable_nonboot_cpus(); 333 enable_nonboot_cpus();
328 334
329 Platfrom_finish: 335 Platform_wake:
330 if (suspend_ops->finish) 336 if (suspend_ops->wake)
331 suspend_ops->finish(); 337 suspend_ops->wake();
332 338
333 Power_up_devices: 339 Power_up_devices:
334 device_power_up(PMSG_RESUME); 340 device_power_up(PMSG_RESUME);
335 341
342 Platfrom_finish:
343 if (suspend_ops->finish)
344 suspend_ops->finish();
345
336 Done: 346 Done:
337 device_pm_unlock(); 347 device_pm_unlock();
338 348
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index 505f319e489c..8ba052c86d48 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -64,8 +64,6 @@ static int submit(int rw, pgoff_t page_off, struct page *page,
64 struct bio *bio; 64 struct bio *bio;
65 65
66 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1); 66 bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
67 if (!bio)
68 return -ENOMEM;
69 bio->bi_sector = page_off * (PAGE_SIZE >> 9); 67 bio->bi_sector = page_off * (PAGE_SIZE >> 9);
70 bio->bi_bdev = resume_bdev; 68 bio->bi_bdev = resume_bdev;
71 bio->bi_end_io = end_swap_bio_read; 69 bio->bi_end_io = end_swap_bio_read;
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 6c85359364f2..ed97375daae9 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -24,6 +24,7 @@
24#include <linux/cpu.h> 24#include <linux/cpu.h>
25#include <linux/freezer.h> 25#include <linux/freezer.h>
26#include <linux/smp_lock.h> 26#include <linux/smp_lock.h>
27#include <scsi/scsi_scan.h>
27 28
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29 30
@@ -92,6 +93,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
92 filp->private_data = data; 93 filp->private_data = data;
93 memset(&data->handle, 0, sizeof(struct snapshot_handle)); 94 memset(&data->handle, 0, sizeof(struct snapshot_handle));
94 if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { 95 if ((filp->f_flags & O_ACCMODE) == O_RDONLY) {
96 /* Hibernating. The image device should be accessible. */
95 data->swap = swsusp_resume_device ? 97 data->swap = swsusp_resume_device ?
96 swap_type_of(swsusp_resume_device, 0, NULL) : -1; 98 swap_type_of(swsusp_resume_device, 0, NULL) : -1;
97 data->mode = O_RDONLY; 99 data->mode = O_RDONLY;
@@ -99,6 +101,13 @@ static int snapshot_open(struct inode *inode, struct file *filp)
99 if (error) 101 if (error)
100 pm_notifier_call_chain(PM_POST_HIBERNATION); 102 pm_notifier_call_chain(PM_POST_HIBERNATION);
101 } else { 103 } else {
104 /*
105 * Resuming. We may need to wait for the image device to
106 * appear.
107 */
108 wait_for_device_probe();
109 scsi_complete_async_scans();
110
102 data->swap = -1; 111 data->swap = -1;
103 data->mode = O_WRONLY; 112 data->mode = O_WRONLY;
104 error = pm_notifier_call_chain(PM_RESTORE_PREPARE); 113 error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 321127d965c2..4559e84f4b8a 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -21,9 +21,7 @@
21#include <linux/audit.h> 21#include <linux/audit.h>
22#include <linux/pid_namespace.h> 22#include <linux/pid_namespace.h>
23#include <linux/syscalls.h> 23#include <linux/syscalls.h>
24 24#include <linux/uaccess.h>
25#include <asm/pgtable.h>
26#include <asm/uaccess.h>
27 25
28 26
29/* 27/*
@@ -38,7 +36,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
38 list_add(&child->ptrace_entry, &new_parent->ptraced); 36 list_add(&child->ptrace_entry, &new_parent->ptraced);
39 child->parent = new_parent; 37 child->parent = new_parent;
40} 38}
41 39
42/* 40/*
43 * Turn a tracing stop into a normal stop now, since with no tracer there 41 * Turn a tracing stop into a normal stop now, since with no tracer there
44 * would be no way to wake it up with SIGCONT or SIGKILL. If there was a 42 * would be no way to wake it up with SIGCONT or SIGKILL. If there was a
@@ -163,7 +161,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
163 task_lock(task); 161 task_lock(task);
164 err = __ptrace_may_access(task, mode); 162 err = __ptrace_may_access(task, mode);
165 task_unlock(task); 163 task_unlock(task);
166 return (!err ? true : false); 164 return !err;
167} 165}
168 166
169int ptrace_attach(struct task_struct *task) 167int ptrace_attach(struct task_struct *task)
@@ -348,7 +346,7 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst
348 copied += retval; 346 copied += retval;
349 src += retval; 347 src += retval;
350 dst += retval; 348 dst += retval;
351 len -= retval; 349 len -= retval;
352 } 350 }
353 return copied; 351 return copied;
354} 352}
@@ -373,7 +371,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
373 copied += retval; 371 copied += retval;
374 src += retval; 372 src += retval;
375 dst += retval; 373 dst += retval;
376 len -= retval; 374 len -= retval;
377 } 375 }
378 return copied; 376 return copied;
379} 377}
@@ -486,9 +484,9 @@ static int ptrace_resume(struct task_struct *child, long request, long data)
486 if (unlikely(!arch_has_single_step())) 484 if (unlikely(!arch_has_single_step()))
487 return -EIO; 485 return -EIO;
488 user_enable_single_step(child); 486 user_enable_single_step(child);
489 } 487 } else {
490 else
491 user_disable_single_step(child); 488 user_disable_single_step(child);
489 }
492 490
493 child->exit_code = data; 491 child->exit_code = data;
494 wake_up_process(child); 492 wake_up_process(child);
@@ -596,10 +594,11 @@ repeat:
596 ret = security_ptrace_traceme(current->parent); 594 ret = security_ptrace_traceme(current->parent);
597 595
598 /* 596 /*
599 * Set the ptrace bit in the process ptrace flags. 597 * Check PF_EXITING to ensure ->real_parent has not passed
600 * Then link us on our parent's ptraced list. 598 * exit_ptrace(). Otherwise we don't report the error but
599 * pretend ->real_parent untraces us right after return.
601 */ 600 */
602 if (!ret) { 601 if (!ret && !(current->real_parent->flags & PF_EXITING)) {
603 current->ptrace |= PT_PTRACED; 602 current->ptrace |= PT_PTRACED;
604 __ptrace_link(current, current->real_parent); 603 __ptrace_link(current, current->real_parent);
605 } 604 }
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 2c7b8457d0d2..a967c9feb90a 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -58,6 +58,10 @@ static DEFINE_MUTEX(rcu_barrier_mutex);
58static struct completion rcu_barrier_completion; 58static struct completion rcu_barrier_completion;
59int rcu_scheduler_active __read_mostly; 59int rcu_scheduler_active __read_mostly;
60 60
61static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
62static struct rcu_head rcu_migrate_head[3];
63static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
64
61/* 65/*
62 * Awaken the corresponding synchronize_rcu() instance now that a 66 * Awaken the corresponding synchronize_rcu() instance now that a
63 * grace period has elapsed. 67 * grace period has elapsed.
@@ -122,7 +126,10 @@ static void rcu_barrier_func(void *type)
122 } 126 }
123} 127}
124 128
125static inline void wait_migrated_callbacks(void); 129static inline void wait_migrated_callbacks(void)
130{
131 wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
132}
126 133
127/* 134/*
128 * Orchestrate the specified type of RCU barrier, waiting for all 135 * Orchestrate the specified type of RCU barrier, waiting for all
@@ -179,21 +186,12 @@ void rcu_barrier_sched(void)
179} 186}
180EXPORT_SYMBOL_GPL(rcu_barrier_sched); 187EXPORT_SYMBOL_GPL(rcu_barrier_sched);
181 188
182static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
183static struct rcu_head rcu_migrate_head[3];
184static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
185
186static void rcu_migrate_callback(struct rcu_head *notused) 189static void rcu_migrate_callback(struct rcu_head *notused)
187{ 190{
188 if (atomic_dec_and_test(&rcu_migrate_type_count)) 191 if (atomic_dec_and_test(&rcu_migrate_type_count))
189 wake_up(&rcu_migrate_wq); 192 wake_up(&rcu_migrate_wq);
190} 193}
191 194
192static inline void wait_migrated_callbacks(void)
193{
194 wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
195}
196
197static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self, 195static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
198 unsigned long action, void *hcpu) 196 unsigned long action, void *hcpu)
199{ 197{
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 7f3266922572..d2a372fb0b9b 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -530,8 +530,6 @@ static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
530 rdp->qs_pending = 1; 530 rdp->qs_pending = 1;
531 rdp->passed_quiesc = 0; 531 rdp->passed_quiesc = 0;
532 rdp->gpnum = rsp->gpnum; 532 rdp->gpnum = rsp->gpnum;
533 rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
534 RCU_JIFFIES_TILL_FORCE_QS;
535} 533}
536 534
537/* 535/*
@@ -578,8 +576,6 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
578 rsp->gpnum++; 576 rsp->gpnum++;
579 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 577 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
580 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 578 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
581 rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
582 RCU_JIFFIES_TILL_FORCE_QS;
583 record_gp_stall_check_time(rsp); 579 record_gp_stall_check_time(rsp);
584 dyntick_record_completed(rsp, rsp->completed - 1); 580 dyntick_record_completed(rsp, rsp->completed - 1);
585 note_new_gpnum(rsp, rdp); 581 note_new_gpnum(rsp, rdp);
@@ -1055,7 +1051,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1055{ 1051{
1056 unsigned long flags; 1052 unsigned long flags;
1057 long lastcomp; 1053 long lastcomp;
1058 struct rcu_data *rdp = rsp->rda[smp_processor_id()];
1059 struct rcu_node *rnp = rcu_get_root(rsp); 1054 struct rcu_node *rnp = rcu_get_root(rsp);
1060 u8 signaled; 1055 u8 signaled;
1061 1056
@@ -1066,16 +1061,13 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1066 return; /* Someone else is already on the job. */ 1061 return; /* Someone else is already on the job. */
1067 } 1062 }
1068 if (relaxed && 1063 if (relaxed &&
1069 (long)(rsp->jiffies_force_qs - jiffies) >= 0 && 1064 (long)(rsp->jiffies_force_qs - jiffies) >= 0)
1070 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) >= 0)
1071 goto unlock_ret; /* no emergency and done recently. */ 1065 goto unlock_ret; /* no emergency and done recently. */
1072 rsp->n_force_qs++; 1066 rsp->n_force_qs++;
1073 spin_lock(&rnp->lock); 1067 spin_lock(&rnp->lock);
1074 lastcomp = rsp->completed; 1068 lastcomp = rsp->completed;
1075 signaled = rsp->signaled; 1069 signaled = rsp->signaled;
1076 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 1070 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
1077 rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
1078 RCU_JIFFIES_TILL_FORCE_QS;
1079 if (lastcomp == rsp->gpnum) { 1071 if (lastcomp == rsp->gpnum) {
1080 rsp->n_force_qs_ngp++; 1072 rsp->n_force_qs_ngp++;
1081 spin_unlock(&rnp->lock); 1073 spin_unlock(&rnp->lock);
@@ -1144,8 +1136,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1144 * If an RCU GP has gone long enough, go check for dyntick 1136 * If an RCU GP has gone long enough, go check for dyntick
1145 * idle CPUs and, if needed, send resched IPIs. 1137 * idle CPUs and, if needed, send resched IPIs.
1146 */ 1138 */
1147 if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || 1139 if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
1148 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
1149 force_quiescent_state(rsp, 1); 1140 force_quiescent_state(rsp, 1);
1150 1141
1151 /* 1142 /*
@@ -1230,8 +1221,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1230 if (unlikely(++rdp->qlen > qhimark)) { 1221 if (unlikely(++rdp->qlen > qhimark)) {
1231 rdp->blimit = LONG_MAX; 1222 rdp->blimit = LONG_MAX;
1232 force_quiescent_state(rsp, 0); 1223 force_quiescent_state(rsp, 0);
1233 } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || 1224 } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
1234 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
1235 force_quiescent_state(rsp, 1); 1225 force_quiescent_state(rsp, 1);
1236 local_irq_restore(flags); 1226 local_irq_restore(flags);
1237} 1227}
@@ -1290,8 +1280,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1290 1280
1291 /* Has an RCU GP gone long enough to send resched IPIs &c? */ 1281 /* Has an RCU GP gone long enough to send resched IPIs &c? */
1292 if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) && 1282 if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
1293 ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || 1283 ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0))
1294 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0))
1295 return 1; 1284 return 1;
1296 1285
1297 /* nothing to do */ 1286 /* nothing to do */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 4ee954f6a8d5..4b1875ba9404 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -49,14 +49,12 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
49{ 49{
50 if (!rdp->beenonline) 50 if (!rdp->beenonline)
51 return; 51 return;
52 seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d rpfq=%ld rp=%x", 52 seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d",
53 rdp->cpu, 53 rdp->cpu,
54 cpu_is_offline(rdp->cpu) ? '!' : ' ', 54 cpu_is_offline(rdp->cpu) ? '!' : ' ',
55 rdp->completed, rdp->gpnum, 55 rdp->completed, rdp->gpnum,
56 rdp->passed_quiesc, rdp->passed_quiesc_completed, 56 rdp->passed_quiesc, rdp->passed_quiesc_completed,
57 rdp->qs_pending, 57 rdp->qs_pending);
58 rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
59 (int)(rdp->n_rcu_pending & 0xffff));
60#ifdef CONFIG_NO_HZ 58#ifdef CONFIG_NO_HZ
61 seq_printf(m, " dt=%d/%d dn=%d df=%lu", 59 seq_printf(m, " dt=%d/%d dn=%d df=%lu",
62 rdp->dynticks->dynticks, 60 rdp->dynticks->dynticks,
@@ -102,14 +100,12 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
102{ 100{
103 if (!rdp->beenonline) 101 if (!rdp->beenonline)
104 return; 102 return;
105 seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d,%ld,%ld", 103 seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d",
106 rdp->cpu, 104 rdp->cpu,
107 cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"", 105 cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"",
108 rdp->completed, rdp->gpnum, 106 rdp->completed, rdp->gpnum,
109 rdp->passed_quiesc, rdp->passed_quiesc_completed, 107 rdp->passed_quiesc, rdp->passed_quiesc_completed,
110 rdp->qs_pending, 108 rdp->qs_pending);
111 rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
112 rdp->n_rcu_pending);
113#ifdef CONFIG_NO_HZ 109#ifdef CONFIG_NO_HZ
114 seq_printf(m, ",%d,%d,%d,%lu", 110 seq_printf(m, ",%d,%d,%d,%lu",
115 rdp->dynticks->dynticks, 111 rdp->dynticks->dynticks,
@@ -123,7 +119,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
123 119
124static int show_rcudata_csv(struct seq_file *m, void *unused) 120static int show_rcudata_csv(struct seq_file *m, void *unused)
125{ 121{
126 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",\"rpfq\",\"rp\","); 122 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",");
127#ifdef CONFIG_NO_HZ 123#ifdef CONFIG_NO_HZ
128 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); 124 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
129#endif /* #ifdef CONFIG_NO_HZ */ 125#endif /* #ifdef CONFIG_NO_HZ */
diff --git a/kernel/resource.c b/kernel/resource.c
index fd5d7d574bb9..ac5f3a36923f 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -533,43 +533,21 @@ static void __init __reserve_region_with_split(struct resource *root,
533 res->end = end; 533 res->end = end;
534 res->flags = IORESOURCE_BUSY; 534 res->flags = IORESOURCE_BUSY;
535 535
536 for (;;) { 536 conflict = __request_resource(parent, res);
537 conflict = __request_resource(parent, res); 537 if (!conflict)
538 if (!conflict) 538 return;
539 break;
540 if (conflict != parent) {
541 parent = conflict;
542 if (!(conflict->flags & IORESOURCE_BUSY))
543 continue;
544 }
545
546 /* Uhhuh, that didn't work out.. */
547 kfree(res);
548 res = NULL;
549 break;
550 }
551
552 if (!res) {
553 /* failed, split and try again */
554
555 /* conflict covered whole area */
556 if (conflict->start <= start && conflict->end >= end)
557 return;
558 539
559 if (conflict->start > start) 540 /* failed, split and try again */
560 __reserve_region_with_split(root, start, conflict->start-1, name); 541 kfree(res);
561 if (!(conflict->flags & IORESOURCE_BUSY)) {
562 resource_size_t common_start, common_end;
563 542
564 common_start = max(conflict->start, start); 543 /* conflict covered whole area */
565 common_end = min(conflict->end, end); 544 if (conflict->start <= start && conflict->end >= end)
566 if (common_start < common_end) 545 return;
567 __reserve_region_with_split(root, common_start, common_end, name);
568 }
569 if (conflict->end < end)
570 __reserve_region_with_split(root, conflict->end+1, end, name);
571 }
572 546
547 if (conflict->start > start)
548 __reserve_region_with_split(root, start, conflict->start-1, name);
549 if (conflict->end < end)
550 __reserve_region_with_split(root, conflict->end+1, end, name);
573} 551}
574 552
575void __init reserve_region_with_split(struct resource *root, 553void __init reserve_region_with_split(struct resource *root,
diff --git a/kernel/sched.c b/kernel/sched.c
index f91bc8141dc3..36322e8682c7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1418,10 +1418,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
1418 struct rq_iterator *iterator); 1418 struct rq_iterator *iterator);
1419#endif 1419#endif
1420 1420
1421/* Time spent by the tasks of the cpu accounting group executing in ... */
1422enum cpuacct_stat_index {
1423 CPUACCT_STAT_USER, /* ... user mode */
1424 CPUACCT_STAT_SYSTEM, /* ... kernel mode */
1425
1426 CPUACCT_STAT_NSTATS,
1427};
1428
1421#ifdef CONFIG_CGROUP_CPUACCT 1429#ifdef CONFIG_CGROUP_CPUACCT
1422static void cpuacct_charge(struct task_struct *tsk, u64 cputime); 1430static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
1431static void cpuacct_update_stats(struct task_struct *tsk,
1432 enum cpuacct_stat_index idx, cputime_t val);
1423#else 1433#else
1424static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} 1434static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
1435static inline void cpuacct_update_stats(struct task_struct *tsk,
1436 enum cpuacct_stat_index idx, cputime_t val) {}
1425#endif 1437#endif
1426 1438
1427static inline void inc_cpu_load(struct rq *rq, unsigned long load) 1439static inline void inc_cpu_load(struct rq *rq, unsigned long load)
@@ -4554,9 +4566,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
4554EXPORT_PER_CPU_SYMBOL(kstat); 4566EXPORT_PER_CPU_SYMBOL(kstat);
4555 4567
4556/* 4568/*
4557 * Return any ns on the sched_clock that have not yet been banked in 4569 * Return any ns on the sched_clock that have not yet been accounted in
4558 * @p in case that task is currently running. 4570 * @p in case that task is currently running.
4571 *
4572 * Called with task_rq_lock() held on @rq.
4559 */ 4573 */
4574static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
4575{
4576 u64 ns = 0;
4577
4578 if (task_current(rq, p)) {
4579 update_rq_clock(rq);
4580 ns = rq->clock - p->se.exec_start;
4581 if ((s64)ns < 0)
4582 ns = 0;
4583 }
4584
4585 return ns;
4586}
4587
4560unsigned long long task_delta_exec(struct task_struct *p) 4588unsigned long long task_delta_exec(struct task_struct *p)
4561{ 4589{
4562 unsigned long flags; 4590 unsigned long flags;
@@ -4564,16 +4592,49 @@ unsigned long long task_delta_exec(struct task_struct *p)
4564 u64 ns = 0; 4592 u64 ns = 0;
4565 4593
4566 rq = task_rq_lock(p, &flags); 4594 rq = task_rq_lock(p, &flags);
4595 ns = do_task_delta_exec(p, rq);
4596 task_rq_unlock(rq, &flags);
4567 4597
4568 if (task_current(rq, p)) { 4598 return ns;
4569 u64 delta_exec; 4599}
4570 4600
4571 update_rq_clock(rq); 4601/*
4572 delta_exec = rq->clock - p->se.exec_start; 4602 * Return accounted runtime for the task.
4573 if ((s64)delta_exec > 0) 4603 * In case the task is currently running, return the runtime plus current's
4574 ns = delta_exec; 4604 * pending runtime that have not been accounted yet.
4575 } 4605 */
4606unsigned long long task_sched_runtime(struct task_struct *p)
4607{
4608 unsigned long flags;
4609 struct rq *rq;
4610 u64 ns = 0;
4611
4612 rq = task_rq_lock(p, &flags);
4613 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
4614 task_rq_unlock(rq, &flags);
4615
4616 return ns;
4617}
4618
4619/*
4620 * Return sum_exec_runtime for the thread group.
4621 * In case the task is currently running, return the sum plus current's
4622 * pending runtime that have not been accounted yet.
4623 *
4624 * Note that the thread group might have other running tasks as well,
4625 * so the return value not includes other pending runtime that other
4626 * running tasks might have.
4627 */
4628unsigned long long thread_group_sched_runtime(struct task_struct *p)
4629{
4630 struct task_cputime totals;
4631 unsigned long flags;
4632 struct rq *rq;
4633 u64 ns;
4576 4634
4635 rq = task_rq_lock(p, &flags);
4636 thread_group_cputime(p, &totals);
4637 ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
4577 task_rq_unlock(rq, &flags); 4638 task_rq_unlock(rq, &flags);
4578 4639
4579 return ns; 4640 return ns;
@@ -4602,6 +4663,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
4602 cpustat->nice = cputime64_add(cpustat->nice, tmp); 4663 cpustat->nice = cputime64_add(cpustat->nice, tmp);
4603 else 4664 else
4604 cpustat->user = cputime64_add(cpustat->user, tmp); 4665 cpustat->user = cputime64_add(cpustat->user, tmp);
4666
4667 cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
4605 /* Account for user time used */ 4668 /* Account for user time used */
4606 acct_update_integrals(p); 4669 acct_update_integrals(p);
4607} 4670}
@@ -4663,6 +4726,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
4663 else 4726 else
4664 cpustat->system = cputime64_add(cpustat->system, tmp); 4727 cpustat->system = cputime64_add(cpustat->system, tmp);
4665 4728
4729 cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
4730
4666 /* Account for system time used */ 4731 /* Account for system time used */
4667 acct_update_integrals(p); 4732 acct_update_integrals(p);
4668} 4733}
@@ -4824,7 +4889,7 @@ void scheduler_tick(void)
4824#endif 4889#endif
4825} 4890}
4826 4891
4827unsigned long get_parent_ip(unsigned long addr) 4892notrace unsigned long get_parent_ip(unsigned long addr)
4828{ 4893{
4829 if (in_lock_functions(addr)) { 4894 if (in_lock_functions(addr)) {
4830 addr = CALLER_ADDR2; 4895 addr = CALLER_ADDR2;
@@ -7345,7 +7410,12 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
7345 cpumask_or(groupmask, groupmask, sched_group_cpus(group)); 7410 cpumask_or(groupmask, groupmask, sched_group_cpus(group));
7346 7411
7347 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); 7412 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
7413
7348 printk(KERN_CONT " %s", str); 7414 printk(KERN_CONT " %s", str);
7415 if (group->__cpu_power != SCHED_LOAD_SCALE) {
7416 printk(KERN_CONT " (__cpu_power = %d)",
7417 group->__cpu_power);
7418 }
7349 7419
7350 group = group->next; 7420 group = group->next;
7351 } while (group != sd->groups); 7421 } while (group != sd->groups);
@@ -9968,6 +10038,7 @@ struct cpuacct {
9968 struct cgroup_subsys_state css; 10038 struct cgroup_subsys_state css;
9969 /* cpuusage holds pointer to a u64-type object on every cpu */ 10039 /* cpuusage holds pointer to a u64-type object on every cpu */
9970 u64 *cpuusage; 10040 u64 *cpuusage;
10041 struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
9971 struct cpuacct *parent; 10042 struct cpuacct *parent;
9972}; 10043};
9973 10044
@@ -9992,20 +10063,32 @@ static struct cgroup_subsys_state *cpuacct_create(
9992 struct cgroup_subsys *ss, struct cgroup *cgrp) 10063 struct cgroup_subsys *ss, struct cgroup *cgrp)
9993{ 10064{
9994 struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); 10065 struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
10066 int i;
9995 10067
9996 if (!ca) 10068 if (!ca)
9997 return ERR_PTR(-ENOMEM); 10069 goto out;
9998 10070
9999 ca->cpuusage = alloc_percpu(u64); 10071 ca->cpuusage = alloc_percpu(u64);
10000 if (!ca->cpuusage) { 10072 if (!ca->cpuusage)
10001 kfree(ca); 10073 goto out_free_ca;
10002 return ERR_PTR(-ENOMEM); 10074
10003 } 10075 for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
10076 if (percpu_counter_init(&ca->cpustat[i], 0))
10077 goto out_free_counters;
10004 10078
10005 if (cgrp->parent) 10079 if (cgrp->parent)
10006 ca->parent = cgroup_ca(cgrp->parent); 10080 ca->parent = cgroup_ca(cgrp->parent);
10007 10081
10008 return &ca->css; 10082 return &ca->css;
10083
10084out_free_counters:
10085 while (--i >= 0)
10086 percpu_counter_destroy(&ca->cpustat[i]);
10087 free_percpu(ca->cpuusage);
10088out_free_ca:
10089 kfree(ca);
10090out:
10091 return ERR_PTR(-ENOMEM);
10009} 10092}
10010 10093
10011/* destroy an existing cpu accounting group */ 10094/* destroy an existing cpu accounting group */
@@ -10013,7 +10096,10 @@ static void
10013cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) 10096cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
10014{ 10097{
10015 struct cpuacct *ca = cgroup_ca(cgrp); 10098 struct cpuacct *ca = cgroup_ca(cgrp);
10099 int i;
10016 10100
10101 for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
10102 percpu_counter_destroy(&ca->cpustat[i]);
10017 free_percpu(ca->cpuusage); 10103 free_percpu(ca->cpuusage);
10018 kfree(ca); 10104 kfree(ca);
10019} 10105}
@@ -10100,6 +10186,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
10100 return 0; 10186 return 0;
10101} 10187}
10102 10188
10189static const char *cpuacct_stat_desc[] = {
10190 [CPUACCT_STAT_USER] = "user",
10191 [CPUACCT_STAT_SYSTEM] = "system",
10192};
10193
10194static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
10195 struct cgroup_map_cb *cb)
10196{
10197 struct cpuacct *ca = cgroup_ca(cgrp);
10198 int i;
10199
10200 for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
10201 s64 val = percpu_counter_read(&ca->cpustat[i]);
10202 val = cputime64_to_clock_t(val);
10203 cb->fill(cb, cpuacct_stat_desc[i], val);
10204 }
10205 return 0;
10206}
10207
10103static struct cftype files[] = { 10208static struct cftype files[] = {
10104 { 10209 {
10105 .name = "usage", 10210 .name = "usage",
@@ -10110,7 +10215,10 @@ static struct cftype files[] = {
10110 .name = "usage_percpu", 10215 .name = "usage_percpu",
10111 .read_seq_string = cpuacct_percpu_seq_read, 10216 .read_seq_string = cpuacct_percpu_seq_read,
10112 }, 10217 },
10113 10218 {
10219 .name = "stat",
10220 .read_map = cpuacct_stats_show,
10221 },
10114}; 10222};
10115 10223
10116static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) 10224static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@ -10132,12 +10240,38 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
10132 return; 10240 return;
10133 10241
10134 cpu = task_cpu(tsk); 10242 cpu = task_cpu(tsk);
10243
10244 rcu_read_lock();
10245
10135 ca = task_ca(tsk); 10246 ca = task_ca(tsk);
10136 10247
10137 for (; ca; ca = ca->parent) { 10248 for (; ca; ca = ca->parent) {
10138 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); 10249 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
10139 *cpuusage += cputime; 10250 *cpuusage += cputime;
10140 } 10251 }
10252
10253 rcu_read_unlock();
10254}
10255
10256/*
10257 * Charge the system/user time to the task's accounting group.
10258 */
10259static void cpuacct_update_stats(struct task_struct *tsk,
10260 enum cpuacct_stat_index idx, cputime_t val)
10261{
10262 struct cpuacct *ca;
10263
10264 if (unlikely(!cpuacct_subsys.active))
10265 return;
10266
10267 rcu_read_lock();
10268 ca = task_ca(tsk);
10269
10270 do {
10271 percpu_counter_add(&ca->cpustat[idx], val);
10272 ca = ca->parent;
10273 } while (ca);
10274 rcu_read_unlock();
10141} 10275}
10142 10276
10143struct cgroup_subsys cpuacct_subsys = { 10277struct cgroup_subsys cpuacct_subsys = {
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 1e00bfacf9b8..cdd3c89574cd 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -55,7 +55,7 @@ static int convert_prio(int prio)
55 * cpupri_find - find the best (lowest-pri) CPU in the system 55 * cpupri_find - find the best (lowest-pri) CPU in the system
56 * @cp: The cpupri context 56 * @cp: The cpupri context
57 * @p: The task 57 * @p: The task
58 * @lowest_mask: A mask to fill in with selected CPUs 58 * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
59 * 59 *
60 * Note: This function returns the recommended CPUs as calculated during the 60 * Note: This function returns the recommended CPUs as calculated during the
61 * current invokation. By the time the call returns, the CPUs may have in 61 * current invokation. By the time the call returns, the CPUs may have in
@@ -81,7 +81,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) 81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
82 continue; 82 continue;
83 83
84 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); 84 if (lowest_mask)
85 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
85 return 1; 86 return 1;
86 } 87 }
87 88
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 299d012b4394..f2c66f8f9712 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -948,20 +948,15 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
948 948
949static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 949static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
950{ 950{
951 cpumask_var_t mask;
952
953 if (rq->curr->rt.nr_cpus_allowed == 1) 951 if (rq->curr->rt.nr_cpus_allowed == 1)
954 return; 952 return;
955 953
956 if (!alloc_cpumask_var(&mask, GFP_ATOMIC))
957 return;
958
959 if (p->rt.nr_cpus_allowed != 1 954 if (p->rt.nr_cpus_allowed != 1
960 && cpupri_find(&rq->rd->cpupri, p, mask)) 955 && cpupri_find(&rq->rd->cpupri, p, NULL))
961 goto free; 956 return;
962 957
963 if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask)) 958 if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
964 goto free; 959 return;
965 960
966 /* 961 /*
967 * There appears to be other cpus that can accept 962 * There appears to be other cpus that can accept
@@ -970,8 +965,6 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
970 */ 965 */
971 requeue_task_rt(rq, p, 1); 966 requeue_task_rt(rq, p, 1);
972 resched_task(rq->curr); 967 resched_task(rq->curr);
973free:
974 free_cpumask_var(mask);
975} 968}
976 969
977#endif /* CONFIG_SMP */ 970#endif /* CONFIG_SMP */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 2fecefacdc5b..b525dd348511 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -472,9 +472,9 @@ void tasklet_kill(struct tasklet_struct *t)
472 printk("Attempt to kill tasklet from interrupt\n"); 472 printk("Attempt to kill tasklet from interrupt\n");
473 473
474 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { 474 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
475 do 475 do {
476 yield(); 476 yield();
477 while (test_bit(TASKLET_STATE_SCHED, &t->state)); 477 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
478 } 478 }
479 tasklet_unlock_wait(t); 479 tasklet_unlock_wait(t);
480 clear_bit(TASKLET_STATE_SCHED, &t->state); 480 clear_bit(TASKLET_STATE_SCHED, &t->state);
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 85d5a2455103..88796c330838 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -166,97 +166,11 @@ void softlockup_tick(void)
166} 166}
167 167
168/* 168/*
169 * Have a reasonable limit on the number of tasks checked:
170 */
171unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
172
173/*
174 * Zero means infinite timeout - no checking done:
175 */
176unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
177
178unsigned long __read_mostly sysctl_hung_task_warnings = 10;
179
180/*
181 * Only do the hung-tasks check on one CPU:
182 */
183static int check_cpu __read_mostly = -1;
184
185static void check_hung_task(struct task_struct *t, unsigned long now)
186{
187 unsigned long switch_count = t->nvcsw + t->nivcsw;
188
189 if (t->flags & PF_FROZEN)
190 return;
191
192 if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
193 t->last_switch_count = switch_count;
194 t->last_switch_timestamp = now;
195 return;
196 }
197 if ((long)(now - t->last_switch_timestamp) <
198 sysctl_hung_task_timeout_secs)
199 return;
200 if (!sysctl_hung_task_warnings)
201 return;
202 sysctl_hung_task_warnings--;
203
204 /*
205 * Ok, the task did not get scheduled for more than 2 minutes,
206 * complain:
207 */
208 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
209 "%ld seconds.\n", t->comm, t->pid,
210 sysctl_hung_task_timeout_secs);
211 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
212 " disables this message.\n");
213 sched_show_task(t);
214 __debug_show_held_locks(t);
215
216 t->last_switch_timestamp = now;
217 touch_nmi_watchdog();
218
219 if (softlockup_panic)
220 panic("softlockup: blocked tasks");
221}
222
223/*
224 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
225 * a really long time (120 seconds). If that happens, print out
226 * a warning.
227 */
228static void check_hung_uninterruptible_tasks(int this_cpu)
229{
230 int max_count = sysctl_hung_task_check_count;
231 unsigned long now = get_timestamp(this_cpu);
232 struct task_struct *g, *t;
233
234 /*
235 * If the system crashed already then all bets are off,
236 * do not report extra hung tasks:
237 */
238 if (test_taint(TAINT_DIE) || did_panic)
239 return;
240
241 read_lock(&tasklist_lock);
242 do_each_thread(g, t) {
243 if (!--max_count)
244 goto unlock;
245 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
246 if (t->state == TASK_UNINTERRUPTIBLE)
247 check_hung_task(t, now);
248 } while_each_thread(g, t);
249 unlock:
250 read_unlock(&tasklist_lock);
251}
252
253/*
254 * The watchdog thread - runs every second and touches the timestamp. 169 * The watchdog thread - runs every second and touches the timestamp.
255 */ 170 */
256static int watchdog(void *__bind_cpu) 171static int watchdog(void *__bind_cpu)
257{ 172{
258 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 173 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
259 int this_cpu = (long)__bind_cpu;
260 174
261 sched_setscheduler(current, SCHED_FIFO, &param); 175 sched_setscheduler(current, SCHED_FIFO, &param);
262 176
@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu)
276 if (kthread_should_stop()) 190 if (kthread_should_stop())
277 break; 191 break;
278 192
279 if (this_cpu == check_cpu) {
280 if (sysctl_hung_task_timeout_secs)
281 check_hung_uninterruptible_tasks(this_cpu);
282 }
283
284 set_current_state(TASK_INTERRUPTIBLE); 193 set_current_state(TASK_INTERRUPTIBLE);
285 } 194 }
286 __set_current_state(TASK_RUNNING); 195 __set_current_state(TASK_RUNNING);
@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
312 break; 221 break;
313 case CPU_ONLINE: 222 case CPU_ONLINE:
314 case CPU_ONLINE_FROZEN: 223 case CPU_ONLINE_FROZEN:
315 check_cpu = cpumask_any(cpu_online_mask);
316 wake_up_process(per_cpu(watchdog_task, hotcpu)); 224 wake_up_process(per_cpu(watchdog_task, hotcpu));
317 break; 225 break;
318#ifdef CONFIG_HOTPLUG_CPU 226#ifdef CONFIG_HOTPLUG_CPU
319 case CPU_DOWN_PREPARE:
320 case CPU_DOWN_PREPARE_FROZEN:
321 if (hotcpu == check_cpu) {
322 /* Pick any other online cpu. */
323 check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
324 }
325 break;
326
327 case CPU_UP_CANCELED: 227 case CPU_UP_CANCELED:
328 case CPU_UP_CANCELED_FROZEN: 228 case CPU_UP_CANCELED_FROZEN:
329 if (!per_cpu(watchdog_task, hotcpu)) 229 if (!per_cpu(watchdog_task, hotcpu))
diff --git a/kernel/sys.c b/kernel/sys.c
index 51dbb55604e8..e7998cf31498 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -360,6 +360,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
360 void __user *, arg) 360 void __user *, arg)
361{ 361{
362 char buffer[256]; 362 char buffer[256];
363 int ret = 0;
363 364
364 /* We only trust the superuser with rebooting the system. */ 365 /* We only trust the superuser with rebooting the system. */
365 if (!capable(CAP_SYS_BOOT)) 366 if (!capable(CAP_SYS_BOOT))
@@ -397,7 +398,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
397 kernel_halt(); 398 kernel_halt();
398 unlock_kernel(); 399 unlock_kernel();
399 do_exit(0); 400 do_exit(0);
400 break; 401 panic("cannot halt");
401 402
402 case LINUX_REBOOT_CMD_POWER_OFF: 403 case LINUX_REBOOT_CMD_POWER_OFF:
403 kernel_power_off(); 404 kernel_power_off();
@@ -417,29 +418,22 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
417 418
418#ifdef CONFIG_KEXEC 419#ifdef CONFIG_KEXEC
419 case LINUX_REBOOT_CMD_KEXEC: 420 case LINUX_REBOOT_CMD_KEXEC:
420 { 421 ret = kernel_kexec();
421 int ret; 422 break;
422 ret = kernel_kexec();
423 unlock_kernel();
424 return ret;
425 }
426#endif 423#endif
427 424
428#ifdef CONFIG_HIBERNATION 425#ifdef CONFIG_HIBERNATION
429 case LINUX_REBOOT_CMD_SW_SUSPEND: 426 case LINUX_REBOOT_CMD_SW_SUSPEND:
430 { 427 ret = hibernate();
431 int ret = hibernate(); 428 break;
432 unlock_kernel();
433 return ret;
434 }
435#endif 429#endif
436 430
437 default: 431 default:
438 unlock_kernel(); 432 ret = -EINVAL;
439 return -EINVAL; 433 break;
440 } 434 }
441 unlock_kernel(); 435 unlock_kernel();
442 return 0; 436 return ret;
443} 437}
444 438
445static void deferred_cad(struct work_struct *dummy) 439static void deferred_cad(struct work_struct *dummy)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b125e3387568..e3d2c7dd59b9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -101,6 +101,7 @@ static int __maybe_unused one = 1;
101static int __maybe_unused two = 2; 101static int __maybe_unused two = 2;
102static unsigned long one_ul = 1; 102static unsigned long one_ul = 1;
103static int one_hundred = 100; 103static int one_hundred = 100;
104static int one_thousand = 1000;
104 105
105/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 106/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
106static int maxolduid = 65535; 107static int maxolduid = 65535;
@@ -813,6 +814,19 @@ static struct ctl_table kern_table[] = {
813 .extra1 = &neg_one, 814 .extra1 = &neg_one,
814 .extra2 = &sixty, 815 .extra2 = &sixty,
815 }, 816 },
817#endif
818#ifdef CONFIG_DETECT_HUNG_TASK
819 {
820 .ctl_name = CTL_UNNUMBERED,
821 .procname = "hung_task_panic",
822 .data = &sysctl_hung_task_panic,
823 .maxlen = sizeof(int),
824 .mode = 0644,
825 .proc_handler = &proc_dointvec_minmax,
826 .strategy = &sysctl_intvec,
827 .extra1 = &zero,
828 .extra2 = &one,
829 },
816 { 830 {
817 .ctl_name = CTL_UNNUMBERED, 831 .ctl_name = CTL_UNNUMBERED,
818 .procname = "hung_task_check_count", 832 .procname = "hung_task_check_count",
@@ -828,7 +842,7 @@ static struct ctl_table kern_table[] = {
828 .data = &sysctl_hung_task_timeout_secs, 842 .data = &sysctl_hung_task_timeout_secs,
829 .maxlen = sizeof(unsigned long), 843 .maxlen = sizeof(unsigned long),
830 .mode = 0644, 844 .mode = 0644,
831 .proc_handler = &proc_doulongvec_minmax, 845 .proc_handler = &proc_dohung_task_timeout_secs,
832 .strategy = &sysctl_intvec, 846 .strategy = &sysctl_intvec,
833 }, 847 },
834 { 848 {
@@ -888,16 +902,6 @@ static struct ctl_table kern_table[] = {
888 .proc_handler = &proc_dointvec, 902 .proc_handler = &proc_dointvec,
889 }, 903 },
890#endif 904#endif
891#ifdef CONFIG_UNEVICTABLE_LRU
892 {
893 .ctl_name = CTL_UNNUMBERED,
894 .procname = "scan_unevictable_pages",
895 .data = &scan_unevictable_pages,
896 .maxlen = sizeof(scan_unevictable_pages),
897 .mode = 0644,
898 .proc_handler = &scan_unevictable_handler,
899 },
900#endif
901#ifdef CONFIG_SLOW_WORK 905#ifdef CONFIG_SLOW_WORK
902 { 906 {
903 .ctl_name = CTL_UNNUMBERED, 907 .ctl_name = CTL_UNNUMBERED,
@@ -1027,6 +1031,28 @@ static struct ctl_table vm_table[] = {
1027 .proc_handler = &proc_dointvec, 1031 .proc_handler = &proc_dointvec,
1028 }, 1032 },
1029 { 1033 {
1034 .ctl_name = CTL_UNNUMBERED,
1035 .procname = "nr_pdflush_threads_min",
1036 .data = &nr_pdflush_threads_min,
1037 .maxlen = sizeof nr_pdflush_threads_min,
1038 .mode = 0644 /* read-write */,
1039 .proc_handler = &proc_dointvec_minmax,
1040 .strategy = &sysctl_intvec,
1041 .extra1 = &one,
1042 .extra2 = &nr_pdflush_threads_max,
1043 },
1044 {
1045 .ctl_name = CTL_UNNUMBERED,
1046 .procname = "nr_pdflush_threads_max",
1047 .data = &nr_pdflush_threads_max,
1048 .maxlen = sizeof nr_pdflush_threads_max,
1049 .mode = 0644 /* read-write */,
1050 .proc_handler = &proc_dointvec_minmax,
1051 .strategy = &sysctl_intvec,
1052 .extra1 = &nr_pdflush_threads_min,
1053 .extra2 = &one_thousand,
1054 },
1055 {
1030 .ctl_name = VM_SWAPPINESS, 1056 .ctl_name = VM_SWAPPINESS,
1031 .procname = "swappiness", 1057 .procname = "swappiness",
1032 .data = &vm_swappiness, 1058 .data = &vm_swappiness,
@@ -1266,6 +1292,16 @@ static struct ctl_table vm_table[] = {
1266 .extra2 = &one, 1292 .extra2 = &one,
1267 }, 1293 },
1268#endif 1294#endif
1295#ifdef CONFIG_UNEVICTABLE_LRU
1296 {
1297 .ctl_name = CTL_UNNUMBERED,
1298 .procname = "scan_unevictable_pages",
1299 .data = &scan_unevictable_pages,
1300 .maxlen = sizeof(scan_unevictable_pages),
1301 .mode = 0644,
1302 .proc_handler = &scan_unevictable_handler,
1303 },
1304#endif
1269/* 1305/*
1270 * NOTE: do not add new entries to this table unless you have read 1306 * NOTE: do not add new entries to this table unless you have read
1271 * Documentation/sysctl/ctl_unnumbered.txt 1307 * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index c46c931a7fe7..ecfd7b5187e0 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -181,12 +181,12 @@ static void clocksource_watchdog(unsigned long data)
181 181
182 resumed = test_and_clear_bit(0, &watchdog_resumed); 182 resumed = test_and_clear_bit(0, &watchdog_resumed);
183 183
184 wdnow = watchdog->read(); 184 wdnow = watchdog->read(watchdog);
185 wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask); 185 wd_nsec = cyc2ns(watchdog, (wdnow - watchdog_last) & watchdog->mask);
186 watchdog_last = wdnow; 186 watchdog_last = wdnow;
187 187
188 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) { 188 list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
189 csnow = cs->read(); 189 csnow = cs->read(cs);
190 190
191 if (unlikely(resumed)) { 191 if (unlikely(resumed)) {
192 cs->wd_last = csnow; 192 cs->wd_last = csnow;
@@ -247,7 +247,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
247 247
248 list_add(&cs->wd_list, &watchdog_list); 248 list_add(&cs->wd_list, &watchdog_list);
249 if (!started && watchdog) { 249 if (!started && watchdog) {
250 watchdog_last = watchdog->read(); 250 watchdog_last = watchdog->read(watchdog);
251 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL; 251 watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
252 add_timer_on(&watchdog_timer, 252 add_timer_on(&watchdog_timer,
253 cpumask_first(cpu_online_mask)); 253 cpumask_first(cpu_online_mask));
@@ -268,7 +268,7 @@ static void clocksource_check_watchdog(struct clocksource *cs)
268 cse->flags &= ~CLOCK_SOURCE_WATCHDOG; 268 cse->flags &= ~CLOCK_SOURCE_WATCHDOG;
269 /* Start if list is not empty */ 269 /* Start if list is not empty */
270 if (!list_empty(&watchdog_list)) { 270 if (!list_empty(&watchdog_list)) {
271 watchdog_last = watchdog->read(); 271 watchdog_last = watchdog->read(watchdog);
272 watchdog_timer.expires = 272 watchdog_timer.expires =
273 jiffies + WATCHDOG_INTERVAL; 273 jiffies + WATCHDOG_INTERVAL;
274 add_timer_on(&watchdog_timer, 274 add_timer_on(&watchdog_timer,
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 06f197560f3b..c3f6c30816e3 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -50,7 +50,7 @@
50 */ 50 */
51#define JIFFIES_SHIFT 8 51#define JIFFIES_SHIFT 8
52 52
53static cycle_t jiffies_read(void) 53static cycle_t jiffies_read(struct clocksource *cs)
54{ 54{
55 return (cycle_t) jiffies; 55 return (cycle_t) jiffies;
56} 56}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 900f1b6598d1..687dff49f6e7 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -182,7 +182,7 @@ EXPORT_SYMBOL(do_settimeofday);
182 */ 182 */
183static void change_clocksource(void) 183static void change_clocksource(void)
184{ 184{
185 struct clocksource *new; 185 struct clocksource *new, *old;
186 186
187 new = clocksource_get_next(); 187 new = clocksource_get_next();
188 188
@@ -191,11 +191,16 @@ static void change_clocksource(void)
191 191
192 clocksource_forward_now(); 192 clocksource_forward_now();
193 193
194 new->raw_time = clock->raw_time; 194 if (clocksource_enable(new))
195 return;
195 196
197 new->raw_time = clock->raw_time;
198 old = clock;
196 clock = new; 199 clock = new;
200 clocksource_disable(old);
201
197 clock->cycle_last = 0; 202 clock->cycle_last = 0;
198 clock->cycle_last = clocksource_read(new); 203 clock->cycle_last = clocksource_read(clock);
199 clock->error = 0; 204 clock->error = 0;
200 clock->xtime_nsec = 0; 205 clock->xtime_nsec = 0;
201 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); 206 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
@@ -292,6 +297,7 @@ void __init timekeeping_init(void)
292 ntp_init(); 297 ntp_init();
293 298
294 clock = clocksource_get_next(); 299 clock = clocksource_get_next();
300 clocksource_enable(clock);
295 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH); 301 clocksource_calculate_interval(clock, NTP_INTERVAL_LENGTH);
296 clock->cycle_last = clocksource_read(clock); 302 clock->cycle_last = clocksource_read(clock);
297 303
diff --git a/kernel/timer.c b/kernel/timer.c
index b4555568b4e4..cffffad01c31 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -531,10 +531,13 @@ static void __init_timer(struct timer_list *timer,
531} 531}
532 532
533/** 533/**
534 * init_timer - initialize a timer. 534 * init_timer_key - initialize a timer
535 * @timer: the timer to be initialized 535 * @timer: the timer to be initialized
536 * @name: name of the timer
537 * @key: lockdep class key of the fake lock used for tracking timer
538 * sync lock dependencies
536 * 539 *
537 * init_timer() must be done to a timer prior calling *any* of the 540 * init_timer_key() must be done to a timer prior calling *any* of the
538 * other timer functions. 541 * other timer functions.
539 */ 542 */
540void init_timer_key(struct timer_list *timer, 543void init_timer_key(struct timer_list *timer,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 2246141bda4d..417d1985e299 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -312,7 +312,7 @@ config KMEMTRACE
312 and profile kernel code. 312 and profile kernel code.
313 313
314 This requires an userspace application to use. See 314 This requires an userspace application to use. See
315 Documentation/vm/kmemtrace.txt for more information. 315 Documentation/trace/kmemtrace.txt for more information.
316 316
317 Saying Y will make the kernel somewhat larger and slower. However, 317 Saying Y will make the kernel somewhat larger and slower. However,
318 if you disable kmemtrace at run-time or boot-time, the performance 318 if you disable kmemtrace at run-time or boot-time, the performance
@@ -403,7 +403,7 @@ config MMIOTRACE
403 implementation and works via page faults. Tracing is disabled by 403 implementation and works via page faults. Tracing is disabled by
404 default and can be enabled at run-time. 404 default and can be enabled at run-time.
405 405
406 See Documentation/tracers/mmiotrace.txt. 406 See Documentation/trace/mmiotrace.txt.
407 If you are not helping to develop drivers, say N. 407 If you are not helping to develop drivers, say N.
408 408
409config MMIOTRACE_TEST 409config MMIOTRACE_TEST
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 947c5b3f90c4..921ef5d1f0ba 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -327,10 +327,10 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
327 char *msg; 327 char *msg;
328 struct blk_trace *bt; 328 struct blk_trace *bt;
329 329
330 if (count > BLK_TN_MAX_MSG) 330 if (count >= BLK_TN_MAX_MSG)
331 return -EINVAL; 331 return -EINVAL;
332 332
333 msg = kmalloc(count, GFP_KERNEL); 333 msg = kmalloc(count + 1, GFP_KERNEL);
334 if (msg == NULL) 334 if (msg == NULL)
335 return -ENOMEM; 335 return -ENOMEM;
336 336
@@ -339,6 +339,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
339 return -EFAULT; 339 return -EFAULT;
340 } 340 }
341 341
342 msg[count] = '\0';
342 bt = filp->private_data; 343 bt = filp->private_data;
343 __trace_note_message(bt, "%s", msg); 344 __trace_note_message(bt, "%s", msg);
344 kfree(msg); 345 kfree(msg);
@@ -642,7 +643,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
642 if (blk_pc_request(rq)) { 643 if (blk_pc_request(rq)) {
643 what |= BLK_TC_ACT(BLK_TC_PC); 644 what |= BLK_TC_ACT(BLK_TC_PC);
644 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, 645 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
645 sizeof(rq->cmd), rq->cmd); 646 rq->cmd_len, rq->cmd);
646 } else { 647 } else {
647 what |= BLK_TC_ACT(BLK_TC_FS); 648 what |= BLK_TC_ACT(BLK_TC_FS);
648 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, 649 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
@@ -1376,12 +1377,12 @@ static int blk_trace_str2mask(const char *str)
1376{ 1377{
1377 int i; 1378 int i;
1378 int mask = 0; 1379 int mask = 0;
1379 char *s, *token; 1380 char *buf, *s, *token;
1380 1381
1381 s = kstrdup(str, GFP_KERNEL); 1382 buf = kstrdup(str, GFP_KERNEL);
1382 if (s == NULL) 1383 if (buf == NULL)
1383 return -ENOMEM; 1384 return -ENOMEM;
1384 s = strstrip(s); 1385 s = strstrip(buf);
1385 1386
1386 while (1) { 1387 while (1) {
1387 token = strsep(&s, ","); 1388 token = strsep(&s, ",");
@@ -1402,7 +1403,7 @@ static int blk_trace_str2mask(const char *str)
1402 break; 1403 break;
1403 } 1404 }
1404 } 1405 }
1405 kfree(s); 1406 kfree(buf);
1406 1407
1407 return mask; 1408 return mask;
1408} 1409}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a0174a40c563..1ce5dc6372b8 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -30,6 +30,7 @@
30#include <linux/percpu.h> 30#include <linux/percpu.h>
31#include <linux/splice.h> 31#include <linux/splice.h>
32#include <linux/kdebug.h> 32#include <linux/kdebug.h>
33#include <linux/string.h>
33#include <linux/ctype.h> 34#include <linux/ctype.h>
34#include <linux/init.h> 35#include <linux/init.h>
35#include <linux/poll.h> 36#include <linux/poll.h>
@@ -147,8 +148,7 @@ static int __init set_ftrace_dump_on_oops(char *str)
147} 148}
148__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); 149__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
149 150
150long 151unsigned long long ns2usecs(cycle_t nsec)
151ns2usecs(cycle_t nsec)
152{ 152{
153 nsec += 500; 153 nsec += 500;
154 do_div(nsec, 1000); 154 do_div(nsec, 1000);
@@ -1632,7 +1632,11 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
1632 return; 1632 return;
1633 1633
1634 cpumask_set_cpu(iter->cpu, iter->started); 1634 cpumask_set_cpu(iter->cpu, iter->started);
1635 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); 1635
1636 /* Don't print started cpu buffer for the first entry of the trace */
1637 if (iter->idx > 1)
1638 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
1639 iter->cpu);
1636} 1640}
1637 1641
1638static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 1642static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
@@ -1867,6 +1871,11 @@ __tracing_open(struct inode *inode, struct file *file)
1867 if (current_trace) 1871 if (current_trace)
1868 *iter->trace = *current_trace; 1872 *iter->trace = *current_trace;
1869 1873
1874 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
1875 goto fail;
1876
1877 cpumask_clear(iter->started);
1878
1870 if (current_trace && current_trace->print_max) 1879 if (current_trace && current_trace->print_max)
1871 iter->tr = &max_tr; 1880 iter->tr = &max_tr;
1872 else 1881 else
@@ -1917,6 +1926,7 @@ __tracing_open(struct inode *inode, struct file *file)
1917 if (iter->buffer_iter[cpu]) 1926 if (iter->buffer_iter[cpu])
1918 ring_buffer_read_finish(iter->buffer_iter[cpu]); 1927 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1919 } 1928 }
1929 free_cpumask_var(iter->started);
1920 fail: 1930 fail:
1921 mutex_unlock(&trace_types_lock); 1931 mutex_unlock(&trace_types_lock);
1922 kfree(iter->trace); 1932 kfree(iter->trace);
@@ -1960,6 +1970,7 @@ static int tracing_release(struct inode *inode, struct file *file)
1960 1970
1961 seq_release(inode, file); 1971 seq_release(inode, file);
1962 mutex_destroy(&iter->mutex); 1972 mutex_destroy(&iter->mutex);
1973 free_cpumask_var(iter->started);
1963 kfree(iter->trace); 1974 kfree(iter->trace);
1964 kfree(iter); 1975 kfree(iter);
1965 return 0; 1976 return 0;
@@ -2358,9 +2369,9 @@ static const char readme_msg[] =
2358 "# mkdir /debug\n" 2369 "# mkdir /debug\n"
2359 "# mount -t debugfs nodev /debug\n\n" 2370 "# mount -t debugfs nodev /debug\n\n"
2360 "# cat /debug/tracing/available_tracers\n" 2371 "# cat /debug/tracing/available_tracers\n"
2361 "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n" 2372 "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
2362 "# cat /debug/tracing/current_tracer\n" 2373 "# cat /debug/tracing/current_tracer\n"
2363 "none\n" 2374 "nop\n"
2364 "# echo sched_switch > /debug/tracing/current_tracer\n" 2375 "# echo sched_switch > /debug/tracing/current_tracer\n"
2365 "# cat /debug/tracing/current_tracer\n" 2376 "# cat /debug/tracing/current_tracer\n"
2366 "sched_switch\n" 2377 "sched_switch\n"
@@ -3266,19 +3277,13 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
3266 3277
3267 info->tr = &global_trace; 3278 info->tr = &global_trace;
3268 info->cpu = cpu; 3279 info->cpu = cpu;
3269 info->spare = ring_buffer_alloc_read_page(info->tr->buffer); 3280 info->spare = NULL;
3270 /* Force reading ring buffer for first read */ 3281 /* Force reading ring buffer for first read */
3271 info->read = (unsigned int)-1; 3282 info->read = (unsigned int)-1;
3272 if (!info->spare)
3273 goto out;
3274 3283
3275 filp->private_data = info; 3284 filp->private_data = info;
3276 3285
3277 return 0; 3286 return nonseekable_open(inode, filp);
3278
3279 out:
3280 kfree(info);
3281 return -ENOMEM;
3282} 3287}
3283 3288
3284static ssize_t 3289static ssize_t
@@ -3293,6 +3298,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3293 if (!count) 3298 if (!count)
3294 return 0; 3299 return 0;
3295 3300
3301 if (!info->spare)
3302 info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
3303 if (!info->spare)
3304 return -ENOMEM;
3305
3296 /* Do we have previous read data to read? */ 3306 /* Do we have previous read data to read? */
3297 if (info->read < PAGE_SIZE) 3307 if (info->read < PAGE_SIZE)
3298 goto read; 3308 goto read;
@@ -3331,7 +3341,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
3331{ 3341{
3332 struct ftrace_buffer_info *info = file->private_data; 3342 struct ftrace_buffer_info *info = file->private_data;
3333 3343
3334 ring_buffer_free_read_page(info->tr->buffer, info->spare); 3344 if (info->spare)
3345 ring_buffer_free_read_page(info->tr->buffer, info->spare);
3335 kfree(info); 3346 kfree(info);
3336 3347
3337 return 0; 3348 return 0;
@@ -3417,14 +3428,19 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3417 int size, i; 3428 int size, i;
3418 size_t ret; 3429 size_t ret;
3419 3430
3420 /* 3431 if (*ppos & (PAGE_SIZE - 1)) {
3421 * We can't seek on a buffer input 3432 WARN_ONCE(1, "Ftrace: previous read must page-align\n");
3422 */ 3433 return -EINVAL;
3423 if (unlikely(*ppos)) 3434 }
3424 return -ESPIPE;
3425 3435
3436 if (len & (PAGE_SIZE - 1)) {
3437 WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
3438 if (len < PAGE_SIZE)
3439 return -EINVAL;
3440 len &= PAGE_MASK;
3441 }
3426 3442
3427 for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) { 3443 for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) {
3428 struct page *page; 3444 struct page *page;
3429 int r; 3445 int r;
3430 3446
@@ -3463,6 +3479,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3463 spd.partial[i].offset = 0; 3479 spd.partial[i].offset = 0;
3464 spd.partial[i].private = (unsigned long)ref; 3480 spd.partial[i].private = (unsigned long)ref;
3465 spd.nr_pages++; 3481 spd.nr_pages++;
3482 *ppos += PAGE_SIZE;
3466 } 3483 }
3467 3484
3468 spd.nr_pages = i; 3485 spd.nr_pages = i;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9e15802cca9f..7c3f49aad6e2 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -604,7 +604,7 @@ extern int trace_selftest_startup_hw_branches(struct tracer *trace,
604#endif /* CONFIG_FTRACE_STARTUP_TEST */ 604#endif /* CONFIG_FTRACE_STARTUP_TEST */
605 605
606extern void *head_page(struct trace_array_cpu *data); 606extern void *head_page(struct trace_array_cpu *data);
607extern long ns2usecs(cycle_t nsec); 607extern unsigned long long ns2usecs(cycle_t nsec);
608extern int 608extern int
609trace_vbprintk(unsigned long ip, const char *fmt, va_list args); 609trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
610extern int 610extern int
diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c
index ad8c22efff41..8333715e4066 100644
--- a/kernel/trace/trace_branch.c
+++ b/kernel/trace/trace_branch.c
@@ -155,6 +155,13 @@ static enum print_line_t trace_branch_print(struct trace_iterator *iter,
155 return TRACE_TYPE_HANDLED; 155 return TRACE_TYPE_HANDLED;
156} 156}
157 157
158static void branch_print_header(struct seq_file *s)
159{
160 seq_puts(s, "# TASK-PID CPU# TIMESTAMP CORRECT"
161 " FUNC:FILE:LINE\n");
162 seq_puts(s, "# | | | | | "
163 " |\n");
164}
158 165
159static struct trace_event trace_branch_event = { 166static struct trace_event trace_branch_event = {
160 .type = TRACE_BRANCH, 167 .type = TRACE_BRANCH,
@@ -169,6 +176,7 @@ static struct tracer branch_trace __read_mostly =
169#ifdef CONFIG_FTRACE_SELFTEST 176#ifdef CONFIG_FTRACE_SELFTEST
170 .selftest = trace_selftest_startup_branch, 177 .selftest = trace_selftest_startup_branch,
171#endif /* CONFIG_FTRACE_SELFTEST */ 178#endif /* CONFIG_FTRACE_SELFTEST */
179 .print_header = branch_print_header,
172}; 180};
173 181
174__init static int init_branch_tracer(void) 182__init static int init_branch_tracer(void)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 64ec4d278ffb..576f4fa2af0d 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -503,6 +503,7 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
503 503
504 if (copy_from_user(&buf, ubuf, cnt)) 504 if (copy_from_user(&buf, ubuf, cnt))
505 return -EFAULT; 505 return -EFAULT;
506 buf[cnt] = '\0';
506 507
507 pred = kzalloc(sizeof(*pred), GFP_KERNEL); 508 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
508 if (!pred) 509 if (!pred)
@@ -520,9 +521,10 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
520 return cnt; 521 return cnt;
521 } 522 }
522 523
523 if (filter_add_pred(call, pred)) { 524 err = filter_add_pred(call, pred);
525 if (err < 0) {
524 filter_free_pred(pred); 526 filter_free_pred(pred);
525 return -EINVAL; 527 return err;
526 } 528 }
527 529
528 *ppos += cnt; 530 *ppos += cnt;
@@ -569,6 +571,7 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
569 571
570 if (copy_from_user(&buf, ubuf, cnt)) 572 if (copy_from_user(&buf, ubuf, cnt))
571 return -EFAULT; 573 return -EFAULT;
574 buf[cnt] = '\0';
572 575
573 pred = kzalloc(sizeof(*pred), GFP_KERNEL); 576 pred = kzalloc(sizeof(*pred), GFP_KERNEL);
574 if (!pred) 577 if (!pred)
@@ -586,10 +589,11 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
586 return cnt; 589 return cnt;
587 } 590 }
588 591
589 if (filter_add_subsystem_pred(system, pred)) { 592 err = filter_add_subsystem_pred(system, pred);
593 if (err < 0) {
590 filter_free_subsystem_preds(system); 594 filter_free_subsystem_preds(system);
591 filter_free_pred(pred); 595 filter_free_pred(pred);
592 return -EINVAL; 596 return err;
593 } 597 }
594 598
595 *ppos += cnt; 599 *ppos += cnt;
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 026be412f356..e03cbf1e38f3 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -215,7 +215,7 @@ static int __filter_add_pred(struct ftrace_event_call *call,
215 } 215 }
216 } 216 }
217 217
218 return -ENOMEM; 218 return -ENOSPC;
219} 219}
220 220
221static int is_string_field(const char *type) 221static int is_string_field(const char *type)
@@ -319,7 +319,7 @@ int filter_add_subsystem_pred(struct event_subsystem *system,
319 } 319 }
320 320
321 if (i == MAX_FILTER_PRED) 321 if (i == MAX_FILTER_PRED)
322 return -EINVAL; 322 return -ENOSPC;
323 323
324 events_for_each(call) { 324 events_for_each(call) {
325 int err; 325 int err;
@@ -410,16 +410,22 @@ int filter_parse(char **pbuf, struct filter_pred *pred)
410 } 410 }
411 } 411 }
412 412
413 if (!val_str) {
414 pred->field_name = NULL;
415 return -EINVAL;
416 }
417
413 pred->field_name = kstrdup(pred->field_name, GFP_KERNEL); 418 pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
414 if (!pred->field_name) 419 if (!pred->field_name)
415 return -ENOMEM; 420 return -ENOMEM;
416 421
417 pred->val = simple_strtoull(val_str, &tmp, 10); 422 pred->val = simple_strtoull(val_str, &tmp, 0);
418 if (tmp == val_str) { 423 if (tmp == val_str) {
419 pred->str_val = kstrdup(val_str, GFP_KERNEL); 424 pred->str_val = kstrdup(val_str, GFP_KERNEL);
420 if (!pred->str_val) 425 if (!pred->str_val)
421 return -ENOMEM; 426 return -ENOMEM;
422 } 427 } else if (*tmp != '\0')
428 return -EINVAL;
423 429
424 return 0; 430 return 0;
425} 431}
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
index 30743f7d4110..d363c6672c6c 100644
--- a/kernel/trace/trace_events_stage_2.h
+++ b/kernel/trace/trace_events_stage_2.h
@@ -105,10 +105,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \
105 return 0; 105 return 0;
106 106
107#undef __entry 107#undef __entry
108#define __entry "REC" 108#define __entry REC
109 109
110#undef TP_printk 110#undef TP_printk
111#define TP_printk(fmt, args...) "%s, %s\n", #fmt, #args 111#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
112 112
113#undef TP_fast_assign 113#undef TP_fast_assign
114#define TP_fast_assign(args...) args 114#define TP_fast_assign(args...) args
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 4d9952d3df50..07a22c33ebf3 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -40,7 +40,7 @@
40 40
41#undef TRACE_FIELD_ZERO_CHAR 41#undef TRACE_FIELD_ZERO_CHAR
42#define TRACE_FIELD_ZERO_CHAR(item) \ 42#define TRACE_FIELD_ZERO_CHAR(item) \
43 ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \ 43 ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \
44 "offset:%u;\tsize:0;\n", \ 44 "offset:%u;\tsize:0;\n", \
45 (unsigned int)offsetof(typeof(field), item)); \ 45 (unsigned int)offsetof(typeof(field), item)); \
46 if (!ret) \ 46 if (!ret) \
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index d72b9a63b247..64b54a59c55b 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -423,7 +423,7 @@ int trace_print_lat_context(struct trace_iterator *iter)
423 423
424 trace_find_cmdline(entry->pid, comm); 424 trace_find_cmdline(entry->pid, comm);
425 425
426 ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]" 426 ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]"
427 " %ld.%03ldms (+%ld.%03ldms): ", comm, 427 " %ld.%03ldms (+%ld.%03ldms): ", comm,
428 entry->pid, iter->cpu, entry->flags, 428 entry->pid, iter->cpu, entry->flags,
429 entry->preempt_count, iter->idx, 429 entry->preempt_count, iter->idx,
diff --git a/kernel/trace/trace_power.c b/kernel/trace/trace_power.c
index bae791ebcc51..118439709fb7 100644
--- a/kernel/trace/trace_power.c
+++ b/kernel/trace/trace_power.c
@@ -186,6 +186,12 @@ static enum print_line_t power_print_line(struct trace_iterator *iter)
186 return TRACE_TYPE_UNHANDLED; 186 return TRACE_TYPE_UNHANDLED;
187} 187}
188 188
189static void power_print_header(struct seq_file *s)
190{
191 seq_puts(s, "# TIMESTAMP STATE EVENT\n");
192 seq_puts(s, "# | | |\n");
193}
194
189static struct tracer power_tracer __read_mostly = 195static struct tracer power_tracer __read_mostly =
190{ 196{
191 .name = "power", 197 .name = "power",
@@ -194,6 +200,7 @@ static struct tracer power_tracer __read_mostly =
194 .stop = stop_power_trace, 200 .stop = stop_power_trace,
195 .reset = power_trace_reset, 201 .reset = power_trace_reset,
196 .print_line = power_print_line, 202 .print_line = power_print_line,
203 .print_header = power_print_header,
197}; 204};
198 205
199static int init_power_trace(void) 206static int init_power_trace(void)
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index de35f200abd3..9117cea6f1ae 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -62,6 +62,9 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
62 pc = preempt_count(); 62 pc = preempt_count();
63 tracing_record_cmdline(current); 63 tracing_record_cmdline(current);
64 64
65 if (sched_stopped)
66 return;
67
65 local_irq_save(flags); 68 local_irq_save(flags);
66 cpu = raw_smp_processor_id(); 69 cpu = raw_smp_processor_id();
67 data = ctx_trace->data[cpu]; 70 data = ctx_trace->data[cpu];
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3c5ad6b2ec84..5bc00e8f153e 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -154,7 +154,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
154 if (unlikely(!tracer_enabled || next != wakeup_task)) 154 if (unlikely(!tracer_enabled || next != wakeup_task))
155 goto out_unlock; 155 goto out_unlock;
156 156
157 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 157 trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
158 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); 158 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
159 159
160 /* 160 /*
@@ -257,6 +257,12 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
257 data = wakeup_trace->data[wakeup_cpu]; 257 data = wakeup_trace->data[wakeup_cpu];
258 data->preempt_timestamp = ftrace_now(cpu); 258 data->preempt_timestamp = ftrace_now(cpu);
259 tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); 259 tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
260
261 /*
262 * We must be careful in using CALLER_ADDR2. But since wake_up
263 * is not called by an assembly function (where as schedule is)
264 * it should be safe to use it here.
265 */
260 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 266 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
261 267
262out_locked: 268out_locked:
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index a2a3af29c943..5e579645ac86 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,5 +1,5 @@
1#include <trace/syscall.h>
1#include <linux/kernel.h> 2#include <linux/kernel.h>
2#include <linux/ftrace.h>
3#include <asm/syscall.h> 3#include <asm/syscall.h>
4 4
5#include "trace_output.h" 5#include "trace_output.h"
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b6b966ce1451..f71fb2a08950 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -966,20 +966,20 @@ undo:
966} 966}
967 967
968#ifdef CONFIG_SMP 968#ifdef CONFIG_SMP
969static struct workqueue_struct *work_on_cpu_wq __read_mostly;
970 969
971struct work_for_cpu { 970struct work_for_cpu {
972 struct work_struct work; 971 struct completion completion;
973 long (*fn)(void *); 972 long (*fn)(void *);
974 void *arg; 973 void *arg;
975 long ret; 974 long ret;
976}; 975};
977 976
978static void do_work_for_cpu(struct work_struct *w) 977static int do_work_for_cpu(void *_wfc)
979{ 978{
980 struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work); 979 struct work_for_cpu *wfc = _wfc;
981
982 wfc->ret = wfc->fn(wfc->arg); 980 wfc->ret = wfc->fn(wfc->arg);
981 complete(&wfc->completion);
982 return 0;
983} 983}
984 984
985/** 985/**
@@ -990,17 +990,23 @@ static void do_work_for_cpu(struct work_struct *w)
990 * 990 *
991 * This will return the value @fn returns. 991 * This will return the value @fn returns.
992 * It is up to the caller to ensure that the cpu doesn't go offline. 992 * It is up to the caller to ensure that the cpu doesn't go offline.
993 * The caller must not hold any locks which would prevent @fn from completing.
993 */ 994 */
994long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) 995long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
995{ 996{
996 struct work_for_cpu wfc; 997 struct task_struct *sub_thread;
997 998 struct work_for_cpu wfc = {
998 INIT_WORK(&wfc.work, do_work_for_cpu); 999 .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
999 wfc.fn = fn; 1000 .fn = fn,
1000 wfc.arg = arg; 1001 .arg = arg,
1001 queue_work_on(cpu, work_on_cpu_wq, &wfc.work); 1002 };
1002 flush_work(&wfc.work); 1003
1003 1004 sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
1005 if (IS_ERR(sub_thread))
1006 return PTR_ERR(sub_thread);
1007 kthread_bind(sub_thread, cpu);
1008 wake_up_process(sub_thread);
1009 wait_for_completion(&wfc.completion);
1004 return wfc.ret; 1010 return wfc.ret;
1005} 1011}
1006EXPORT_SYMBOL_GPL(work_on_cpu); 1012EXPORT_SYMBOL_GPL(work_on_cpu);
@@ -1016,8 +1022,4 @@ void __init init_workqueues(void)
1016 hotcpu_notifier(workqueue_cpu_callback, 0); 1022 hotcpu_notifier(workqueue_cpu_callback, 0);
1017 keventd_wq = create_workqueue("events"); 1023 keventd_wq = create_workqueue("events");
1018 BUG_ON(!keventd_wq); 1024 BUG_ON(!keventd_wq);
1019#ifdef CONFIG_SMP
1020 work_on_cpu_wq = create_workqueue("work_on_cpu");
1021 BUG_ON(!work_on_cpu_wq);
1022#endif
1023} 1025}