aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-04-10 06:46:28 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-10 06:46:51 -0400
commit1cad1252ed279ea59f3f8d3d3a5817eeb2f7a4d3 (patch)
treeec5af7a70f58ad27ad21fc27815ca164ccf92c36 /kernel
parentdcef788eb9659b61a2110284fcce3ca6e63480d2 (diff)
parent93cfb3c9fd83d877a8f1ffad9ff862b617b32828 (diff)
Merge branch 'tracing/urgent' into tracing/core
Merge reason: pick up both v2.6.30-rc1 [which includes tracing/urgent fixes] and pick up the current lineup of tracing/urgent fixes as well Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c21
-rw-r--r--kernel/futex.c7
-rw-r--r--kernel/hung_task.c217
-rw-r--r--kernel/irq/devres.c16
-rw-r--r--kernel/irq/handle.c50
-rw-r--r--kernel/irq/manage.c189
-rw-r--r--kernel/irq/numa_migrate.c1
-rw-r--r--kernel/kprobes.c281
-rw-r--r--kernel/kthread.c26
-rw-r--r--kernel/module.c3
-rw-r--r--kernel/posix-cpu-timers.c9
-rw-r--r--kernel/ptrace.c16
-rw-r--r--kernel/sched.c160
-rw-r--r--kernel/sched_cpupri.c5
-rw-r--r--kernel/sched_rt.c15
-rw-r--r--kernel/softlockup.c100
-rw-r--r--kernel/sysctl.c38
-rw-r--r--kernel/timer.c7
-rw-r--r--kernel/trace/blktrace.c10
-rw-r--r--kernel/trace/trace.c36
-rw-r--r--kernel/trace/trace_syscalls.c2
-rw-r--r--kernel/workqueue.c36
24 files changed, 939 insertions, 309 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index bab1dffe37e9..42423665660a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
74obj-$(CONFIG_KPROBES) += kprobes.o 74obj-$(CONFIG_KPROBES) += kprobes.o
75obj-$(CONFIG_KGDB) += kgdb.o 75obj-$(CONFIG_KGDB) += kgdb.o
76obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o 76obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
77obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
77obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 78obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
78obj-$(CONFIG_SECCOMP) += seccomp.o 79obj-$(CONFIG_SECCOMP) += seccomp.o
79obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 80obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/exit.c b/kernel/exit.c
index 32cbf2607cb0..abf9cf3b95c6 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -923,6 +923,8 @@ NORET_TYPE void do_exit(long code)
923 schedule(); 923 schedule();
924 } 924 }
925 925
926 exit_irq_thread();
927
926 exit_signals(tsk); /* sets PF_EXITING */ 928 exit_signals(tsk); /* sets PF_EXITING */
927 /* 929 /*
928 * tsk->flags are checked in the futex code to protect against 930 * tsk->flags are checked in the futex code to protect against
diff --git a/kernel/fork.c b/kernel/fork.c
index 660c2b8765bc..b9e2edd00726 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -645,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
645 645
646 tsk->min_flt = tsk->maj_flt = 0; 646 tsk->min_flt = tsk->maj_flt = 0;
647 tsk->nvcsw = tsk->nivcsw = 0; 647 tsk->nvcsw = tsk->nivcsw = 0;
648#ifdef CONFIG_DETECT_HUNG_TASK
649 tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
650#endif
648 651
649 tsk->mm = NULL; 652 tsk->mm = NULL;
650 tsk->active_mm = NULL; 653 tsk->active_mm = NULL;
@@ -797,6 +800,12 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig)
797 sig->cputime_expires.virt_exp = cputime_zero; 800 sig->cputime_expires.virt_exp = cputime_zero;
798 sig->cputime_expires.sched_exp = 0; 801 sig->cputime_expires.sched_exp = 0;
799 802
803 if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
804 sig->cputime_expires.prof_exp =
805 secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur);
806 sig->cputimer.running = 1;
807 }
808
800 /* The timer lists. */ 809 /* The timer lists. */
801 INIT_LIST_HEAD(&sig->cpu_timers[0]); 810 INIT_LIST_HEAD(&sig->cpu_timers[0]);
802 INIT_LIST_HEAD(&sig->cpu_timers[1]); 811 INIT_LIST_HEAD(&sig->cpu_timers[1]);
@@ -812,11 +821,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
812 atomic_inc(&current->signal->live); 821 atomic_inc(&current->signal->live);
813 return 0; 822 return 0;
814 } 823 }
815 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
816
817 if (sig)
818 posix_cpu_timers_init_group(sig);
819 824
825 sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
820 tsk->signal = sig; 826 tsk->signal = sig;
821 if (!sig) 827 if (!sig)
822 return -ENOMEM; 828 return -ENOMEM;
@@ -856,6 +862,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
856 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); 862 memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
857 task_unlock(current->group_leader); 863 task_unlock(current->group_leader);
858 864
865 posix_cpu_timers_init_group(sig);
866
859 acct_init_pacct(&sig->pacct); 867 acct_init_pacct(&sig->pacct);
860 868
861 tty_audit_fork(sig); 869 tty_audit_fork(sig);
@@ -1032,11 +1040,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1032 1040
1033 p->default_timer_slack_ns = current->timer_slack_ns; 1041 p->default_timer_slack_ns = current->timer_slack_ns;
1034 1042
1035#ifdef CONFIG_DETECT_SOFTLOCKUP
1036 p->last_switch_count = 0;
1037 p->last_switch_timestamp = 0;
1038#endif
1039
1040 task_io_accounting_init(&p->ioac); 1043 task_io_accounting_init(&p->ioac);
1041 acct_clear_integrals(p); 1044 acct_clear_integrals(p);
1042 1045
diff --git a/kernel/futex.c b/kernel/futex.c
index 6b50a024bca2..eef8cd26b5e5 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -883,7 +883,12 @@ retry_private:
883out_unlock: 883out_unlock:
884 double_unlock_hb(hb1, hb2); 884 double_unlock_hb(hb1, hb2);
885 885
886 /* drop_futex_key_refs() must be called outside the spinlocks. */ 886 /*
887 * drop_futex_key_refs() must be called outside the spinlocks. During
888 * the requeue we moved futex_q's from the hash bucket at key1 to the
889 * one at key2 and updated their key pointer. We no longer need to
890 * hold the references to key1.
891 */
887 while (--drop_count >= 0) 892 while (--drop_count >= 0)
888 drop_futex_key_refs(&key1); 893 drop_futex_key_refs(&key1);
889 894
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
new file mode 100644
index 000000000000..022a4927b785
--- /dev/null
+++ b/kernel/hung_task.c
@@ -0,0 +1,217 @@
1/*
2 * Detect Hung Task
3 *
4 * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
5 *
6 */
7
8#include <linux/mm.h>
9#include <linux/cpu.h>
10#include <linux/nmi.h>
11#include <linux/init.h>
12#include <linux/delay.h>
13#include <linux/freezer.h>
14#include <linux/kthread.h>
15#include <linux/lockdep.h>
16#include <linux/module.h>
17#include <linux/sysctl.h>
18
19/*
20 * The number of tasks checked:
21 */
22unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
23
24/*
25 * Limit number of tasks checked in a batch.
26 *
27 * This value controls the preemptibility of khungtaskd since preemption
28 * is disabled during the critical section. It also controls the size of
29 * the RCU grace period. So it needs to be upper-bound.
30 */
31#define HUNG_TASK_BATCHING 1024
32
33/*
34 * Zero means infinite timeout - no checking done:
35 */
36unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
37
38unsigned long __read_mostly sysctl_hung_task_warnings = 10;
39
40static int __read_mostly did_panic;
41
42static struct task_struct *watchdog_task;
43
44/*
45 * Should we panic (and reboot, if panic_timeout= is set) when a
46 * hung task is detected:
47 */
48unsigned int __read_mostly sysctl_hung_task_panic =
49 CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
50
51static int __init hung_task_panic_setup(char *str)
52{
53 sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
54
55 return 1;
56}
57__setup("hung_task_panic=", hung_task_panic_setup);
58
59static int
60hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
61{
62 did_panic = 1;
63
64 return NOTIFY_DONE;
65}
66
67static struct notifier_block panic_block = {
68 .notifier_call = hung_task_panic,
69};
70
71static void check_hung_task(struct task_struct *t, unsigned long timeout)
72{
73 unsigned long switch_count = t->nvcsw + t->nivcsw;
74
75 /*
76 * Ensure the task is not frozen.
77 * Also, when a freshly created task is scheduled once, changes
78 * its state to TASK_UNINTERRUPTIBLE without having ever been
79 * switched out once, it musn't be checked.
80 */
81 if (unlikely(t->flags & PF_FROZEN || !switch_count))
82 return;
83
84 if (switch_count != t->last_switch_count) {
85 t->last_switch_count = switch_count;
86 return;
87 }
88 if (!sysctl_hung_task_warnings)
89 return;
90 sysctl_hung_task_warnings--;
91
92 /*
93 * Ok, the task did not get scheduled for more than 2 minutes,
94 * complain:
95 */
96 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
97 "%ld seconds.\n", t->comm, t->pid, timeout);
98 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
99 " disables this message.\n");
100 sched_show_task(t);
101 __debug_show_held_locks(t);
102
103 touch_nmi_watchdog();
104
105 if (sysctl_hung_task_panic)
106 panic("hung_task: blocked tasks");
107}
108
109/*
110 * To avoid extending the RCU grace period for an unbounded amount of time,
111 * periodically exit the critical section and enter a new one.
112 *
113 * For preemptible RCU it is sufficient to call rcu_read_unlock in order
114 * exit the grace period. For classic RCU, a reschedule is required.
115 */
116static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
117{
118 get_task_struct(g);
119 get_task_struct(t);
120 rcu_read_unlock();
121 cond_resched();
122 rcu_read_lock();
123 put_task_struct(t);
124 put_task_struct(g);
125}
126
127/*
128 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
129 * a really long time (120 seconds). If that happens, print out
130 * a warning.
131 */
132static void check_hung_uninterruptible_tasks(unsigned long timeout)
133{
134 int max_count = sysctl_hung_task_check_count;
135 int batch_count = HUNG_TASK_BATCHING;
136 struct task_struct *g, *t;
137
138 /*
139 * If the system crashed already then all bets are off,
140 * do not report extra hung tasks:
141 */
142 if (test_taint(TAINT_DIE) || did_panic)
143 return;
144
145 rcu_read_lock();
146 do_each_thread(g, t) {
147 if (!--max_count)
148 goto unlock;
149 if (!--batch_count) {
150 batch_count = HUNG_TASK_BATCHING;
151 rcu_lock_break(g, t);
152 /* Exit if t or g was unhashed during refresh. */
153 if (t->state == TASK_DEAD || g->state == TASK_DEAD)
154 goto unlock;
155 }
156 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
157 if (t->state == TASK_UNINTERRUPTIBLE)
158 check_hung_task(t, timeout);
159 } while_each_thread(g, t);
160 unlock:
161 rcu_read_unlock();
162}
163
164static unsigned long timeout_jiffies(unsigned long timeout)
165{
166 /* timeout of 0 will disable the watchdog */
167 return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
168}
169
170/*
171 * Process updating of timeout sysctl
172 */
173int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
174 struct file *filp, void __user *buffer,
175 size_t *lenp, loff_t *ppos)
176{
177 int ret;
178
179 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
180
181 if (ret || !write)
182 goto out;
183
184 wake_up_process(watchdog_task);
185
186 out:
187 return ret;
188}
189
190/*
191 * kthread which checks for tasks stuck in D state
192 */
193static int watchdog(void *dummy)
194{
195 set_user_nice(current, 0);
196
197 for ( ; ; ) {
198 unsigned long timeout = sysctl_hung_task_timeout_secs;
199
200 while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
201 timeout = sysctl_hung_task_timeout_secs;
202
203 check_hung_uninterruptible_tasks(timeout);
204 }
205
206 return 0;
207}
208
209static int __init hung_task_init(void)
210{
211 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
212 watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
213
214 return 0;
215}
216
217module_init(hung_task_init);
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index 38a25b8d8bff..d06df9c41cba 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -26,10 +26,12 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
26} 26}
27 27
28/** 28/**
29 * devm_request_irq - allocate an interrupt line for a managed device 29 * devm_request_threaded_irq - allocate an interrupt line for a managed device
30 * @dev: device to request interrupt for 30 * @dev: device to request interrupt for
31 * @irq: Interrupt line to allocate 31 * @irq: Interrupt line to allocate
32 * @handler: Function to be called when the IRQ occurs 32 * @handler: Function to be called when the IRQ occurs
33 * @thread_fn: function to be called in a threaded interrupt context. NULL
34 * for devices which handle everything in @handler
33 * @irqflags: Interrupt type flags 35 * @irqflags: Interrupt type flags
34 * @devname: An ascii name for the claiming device 36 * @devname: An ascii name for the claiming device
35 * @dev_id: A cookie passed back to the handler function 37 * @dev_id: A cookie passed back to the handler function
@@ -42,9 +44,10 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
42 * If an IRQ allocated with this function needs to be freed 44 * If an IRQ allocated with this function needs to be freed
43 * separately, dev_free_irq() must be used. 45 * separately, dev_free_irq() must be used.
44 */ 46 */
45int devm_request_irq(struct device *dev, unsigned int irq, 47int devm_request_threaded_irq(struct device *dev, unsigned int irq,
46 irq_handler_t handler, unsigned long irqflags, 48 irq_handler_t handler, irq_handler_t thread_fn,
47 const char *devname, void *dev_id) 49 unsigned long irqflags, const char *devname,
50 void *dev_id)
48{ 51{
49 struct irq_devres *dr; 52 struct irq_devres *dr;
50 int rc; 53 int rc;
@@ -54,7 +57,8 @@ int devm_request_irq(struct device *dev, unsigned int irq,
54 if (!dr) 57 if (!dr)
55 return -ENOMEM; 58 return -ENOMEM;
56 59
57 rc = request_irq(irq, handler, irqflags, devname, dev_id); 60 rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname,
61 dev_id);
58 if (rc) { 62 if (rc) {
59 devres_free(dr); 63 devres_free(dr);
60 return rc; 64 return rc;
@@ -66,7 +70,7 @@ int devm_request_irq(struct device *dev, unsigned int irq,
66 70
67 return 0; 71 return 0;
68} 72}
69EXPORT_SYMBOL(devm_request_irq); 73EXPORT_SYMBOL(devm_request_threaded_irq);
70 74
71/** 75/**
72 * devm_free_irq - free an interrupt 76 * devm_free_irq - free an interrupt
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 343acecae629..d82142be8dd2 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -339,6 +339,15 @@ irqreturn_t no_action(int cpl, void *dev_id)
339 return IRQ_NONE; 339 return IRQ_NONE;
340} 340}
341 341
342static void warn_no_thread(unsigned int irq, struct irqaction *action)
343{
344 if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags))
345 return;
346
347 printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD "
348 "but no thread function available.", irq, action->name);
349}
350
342DEFINE_TRACE(irq_handler_entry); 351DEFINE_TRACE(irq_handler_entry);
343DEFINE_TRACE(irq_handler_exit); 352DEFINE_TRACE(irq_handler_exit);
344 353
@@ -363,8 +372,47 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
363 trace_irq_handler_entry(irq, action); 372 trace_irq_handler_entry(irq, action);
364 ret = action->handler(irq, action->dev_id); 373 ret = action->handler(irq, action->dev_id);
365 trace_irq_handler_exit(irq, action, ret); 374 trace_irq_handler_exit(irq, action, ret);
366 if (ret == IRQ_HANDLED) 375
376 switch (ret) {
377 case IRQ_WAKE_THREAD:
378 /*
379 * Set result to handled so the spurious check
380 * does not trigger.
381 */
382 ret = IRQ_HANDLED;
383
384 /*
385 * Catch drivers which return WAKE_THREAD but
386 * did not set up a thread function
387 */
388 if (unlikely(!action->thread_fn)) {
389 warn_no_thread(irq, action);
390 break;
391 }
392
393 /*
394 * Wake up the handler thread for this
395 * action. In case the thread crashed and was
396 * killed we just pretend that we handled the
397 * interrupt. The hardirq handler above has
398 * disabled the device interrupt, so no irq
399 * storm is lurking.
400 */
401 if (likely(!test_bit(IRQTF_DIED,
402 &action->thread_flags))) {
403 set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
404 wake_up_process(action->thread);
405 }
406
407 /* Fall through to add to randomness */
408 case IRQ_HANDLED:
367 status |= action->flags; 409 status |= action->flags;
410 break;
411
412 default:
413 break;
414 }
415
368 retval |= ret; 416 retval |= ret;
369 action = action->next; 417 action = action->next;
370 } while (action); 418 } while (action);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1516ab77355c..7e2e7dd4cd2f 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -8,16 +8,15 @@
8 */ 8 */
9 9
10#include <linux/irq.h> 10#include <linux/irq.h>
11#include <linux/kthread.h>
11#include <linux/module.h> 12#include <linux/module.h>
12#include <linux/random.h> 13#include <linux/random.h>
13#include <linux/interrupt.h> 14#include <linux/interrupt.h>
14#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/sched.h>
15 17
16#include "internals.h" 18#include "internals.h"
17 19
18#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
19cpumask_var_t irq_default_affinity;
20
21/** 20/**
22 * synchronize_irq - wait for pending IRQ handlers (on other CPUs) 21 * synchronize_irq - wait for pending IRQ handlers (on other CPUs)
23 * @irq: interrupt number to wait for 22 * @irq: interrupt number to wait for
@@ -53,9 +52,18 @@ void synchronize_irq(unsigned int irq)
53 52
54 /* Oops, that failed? */ 53 /* Oops, that failed? */
55 } while (status & IRQ_INPROGRESS); 54 } while (status & IRQ_INPROGRESS);
55
56 /*
57 * We made sure that no hardirq handler is running. Now verify
58 * that no threaded handlers are active.
59 */
60 wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active));
56} 61}
57EXPORT_SYMBOL(synchronize_irq); 62EXPORT_SYMBOL(synchronize_irq);
58 63
64#ifdef CONFIG_SMP
65cpumask_var_t irq_default_affinity;
66
59/** 67/**
60 * irq_can_set_affinity - Check if the affinity of a given irq can be set 68 * irq_can_set_affinity - Check if the affinity of a given irq can be set
61 * @irq: Interrupt to check 69 * @irq: Interrupt to check
@@ -72,6 +80,18 @@ int irq_can_set_affinity(unsigned int irq)
72 return 1; 80 return 1;
73} 81}
74 82
83static void
84irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
85{
86 struct irqaction *action = desc->action;
87
88 while (action) {
89 if (action->thread)
90 set_cpus_allowed_ptr(action->thread, cpumask);
91 action = action->next;
92 }
93}
94
75/** 95/**
76 * irq_set_affinity - Set the irq affinity of a given irq 96 * irq_set_affinity - Set the irq affinity of a given irq
77 * @irq: Interrupt to set affinity 97 * @irq: Interrupt to set affinity
@@ -100,6 +120,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
100 cpumask_copy(desc->affinity, cpumask); 120 cpumask_copy(desc->affinity, cpumask);
101 desc->chip->set_affinity(irq, cpumask); 121 desc->chip->set_affinity(irq, cpumask);
102#endif 122#endif
123 irq_set_thread_affinity(desc, cpumask);
103 desc->status |= IRQ_AFFINITY_SET; 124 desc->status |= IRQ_AFFINITY_SET;
104 spin_unlock_irqrestore(&desc->lock, flags); 125 spin_unlock_irqrestore(&desc->lock, flags);
105 return 0; 126 return 0;
@@ -150,6 +171,8 @@ int irq_select_affinity_usr(unsigned int irq)
150 171
151 spin_lock_irqsave(&desc->lock, flags); 172 spin_lock_irqsave(&desc->lock, flags);
152 ret = setup_affinity(irq, desc); 173 ret = setup_affinity(irq, desc);
174 if (!ret)
175 irq_set_thread_affinity(desc, desc->affinity);
153 spin_unlock_irqrestore(&desc->lock, flags); 176 spin_unlock_irqrestore(&desc->lock, flags);
154 177
155 return ret; 178 return ret;
@@ -401,6 +424,90 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
401 return ret; 424 return ret;
402} 425}
403 426
427static int irq_wait_for_interrupt(struct irqaction *action)
428{
429 while (!kthread_should_stop()) {
430 set_current_state(TASK_INTERRUPTIBLE);
431
432 if (test_and_clear_bit(IRQTF_RUNTHREAD,
433 &action->thread_flags)) {
434 __set_current_state(TASK_RUNNING);
435 return 0;
436 }
437 schedule();
438 }
439 return -1;
440}
441
442/*
443 * Interrupt handler thread
444 */
445static int irq_thread(void *data)
446{
447 struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, };
448 struct irqaction *action = data;
449 struct irq_desc *desc = irq_to_desc(action->irq);
450 int wake;
451
452 sched_setscheduler(current, SCHED_FIFO, &param);
453 current->irqaction = action;
454
455 while (!irq_wait_for_interrupt(action)) {
456
457 atomic_inc(&desc->threads_active);
458
459 spin_lock_irq(&desc->lock);
460 if (unlikely(desc->status & IRQ_DISABLED)) {
461 /*
462 * CHECKME: We might need a dedicated
463 * IRQ_THREAD_PENDING flag here, which
464 * retriggers the thread in check_irq_resend()
465 * but AFAICT IRQ_PENDING should be fine as it
466 * retriggers the interrupt itself --- tglx
467 */
468 desc->status |= IRQ_PENDING;
469 spin_unlock_irq(&desc->lock);
470 } else {
471 spin_unlock_irq(&desc->lock);
472
473 action->thread_fn(action->irq, action->dev_id);
474 }
475
476 wake = atomic_dec_and_test(&desc->threads_active);
477
478 if (wake && waitqueue_active(&desc->wait_for_threads))
479 wake_up(&desc->wait_for_threads);
480 }
481
482 /*
483 * Clear irqaction. Otherwise exit_irq_thread() would make
484 * fuzz about an active irq thread going into nirvana.
485 */
486 current->irqaction = NULL;
487 return 0;
488}
489
490/*
491 * Called from do_exit()
492 */
493void exit_irq_thread(void)
494{
495 struct task_struct *tsk = current;
496
497 if (!tsk->irqaction)
498 return;
499
500 printk(KERN_ERR
501 "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
502 tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
503
504 /*
505 * Set the THREAD DIED flag to prevent further wakeups of the
506 * soon to be gone threaded handler.
507 */
508 set_bit(IRQTF_DIED, &tsk->irqaction->flags);
509}
510
404/* 511/*
405 * Internal function to register an irqaction - typically used to 512 * Internal function to register an irqaction - typically used to
406 * allocate special interrupts that are part of the architecture. 513 * allocate special interrupts that are part of the architecture.
@@ -437,6 +544,26 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
437 } 544 }
438 545
439 /* 546 /*
547 * Threaded handler ?
548 */
549 if (new->thread_fn) {
550 struct task_struct *t;
551
552 t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
553 new->name);
554 if (IS_ERR(t))
555 return PTR_ERR(t);
556 /*
557 * We keep the reference to the task struct even if
558 * the thread dies to avoid that the interrupt code
559 * references an already freed task_struct.
560 */
561 get_task_struct(t);
562 new->thread = t;
563 wake_up_process(t);
564 }
565
566 /*
440 * The following block of code has to be executed atomically 567 * The following block of code has to be executed atomically
441 */ 568 */
442 spin_lock_irqsave(&desc->lock, flags); 569 spin_lock_irqsave(&desc->lock, flags);
@@ -473,15 +600,15 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
473 if (!shared) { 600 if (!shared) {
474 irq_chip_set_defaults(desc->chip); 601 irq_chip_set_defaults(desc->chip);
475 602
603 init_waitqueue_head(&desc->wait_for_threads);
604
476 /* Setup the type (level, edge polarity) if configured: */ 605 /* Setup the type (level, edge polarity) if configured: */
477 if (new->flags & IRQF_TRIGGER_MASK) { 606 if (new->flags & IRQF_TRIGGER_MASK) {
478 ret = __irq_set_trigger(desc, irq, 607 ret = __irq_set_trigger(desc, irq,
479 new->flags & IRQF_TRIGGER_MASK); 608 new->flags & IRQF_TRIGGER_MASK);
480 609
481 if (ret) { 610 if (ret)
482 spin_unlock_irqrestore(&desc->lock, flags); 611 goto out_thread;
483 return ret;
484 }
485 } else 612 } else
486 compat_irq_chip_set_default_handler(desc); 613 compat_irq_chip_set_default_handler(desc);
487#if defined(CONFIG_IRQ_PER_CPU) 614#if defined(CONFIG_IRQ_PER_CPU)
@@ -549,8 +676,19 @@ mismatch:
549 dump_stack(); 676 dump_stack();
550 } 677 }
551#endif 678#endif
679 ret = -EBUSY;
680
681out_thread:
552 spin_unlock_irqrestore(&desc->lock, flags); 682 spin_unlock_irqrestore(&desc->lock, flags);
553 return -EBUSY; 683 if (new->thread) {
684 struct task_struct *t = new->thread;
685
686 new->thread = NULL;
687 if (likely(!test_bit(IRQTF_DIED, &new->thread_flags)))
688 kthread_stop(t);
689 put_task_struct(t);
690 }
691 return ret;
554} 692}
555 693
556/** 694/**
@@ -576,6 +714,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
576{ 714{
577 struct irq_desc *desc = irq_to_desc(irq); 715 struct irq_desc *desc = irq_to_desc(irq);
578 struct irqaction *action, **action_ptr; 716 struct irqaction *action, **action_ptr;
717 struct task_struct *irqthread;
579 unsigned long flags; 718 unsigned long flags;
580 719
581 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); 720 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@ -622,6 +761,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
622 else 761 else
623 desc->chip->disable(irq); 762 desc->chip->disable(irq);
624 } 763 }
764
765 irqthread = action->thread;
766 action->thread = NULL;
767
625 spin_unlock_irqrestore(&desc->lock, flags); 768 spin_unlock_irqrestore(&desc->lock, flags);
626 769
627 unregister_handler_proc(irq, action); 770 unregister_handler_proc(irq, action);
@@ -629,6 +772,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
629 /* Make sure it's not being used on another CPU: */ 772 /* Make sure it's not being used on another CPU: */
630 synchronize_irq(irq); 773 synchronize_irq(irq);
631 774
775 if (irqthread) {
776 if (!test_bit(IRQTF_DIED, &action->thread_flags))
777 kthread_stop(irqthread);
778 put_task_struct(irqthread);
779 }
780
632#ifdef CONFIG_DEBUG_SHIRQ 781#ifdef CONFIG_DEBUG_SHIRQ
633 /* 782 /*
634 * It's a shared IRQ -- the driver ought to be prepared for an IRQ 783 * It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@ -681,9 +830,12 @@ void free_irq(unsigned int irq, void *dev_id)
681EXPORT_SYMBOL(free_irq); 830EXPORT_SYMBOL(free_irq);
682 831
683/** 832/**
684 * request_irq - allocate an interrupt line 833 * request_threaded_irq - allocate an interrupt line
685 * @irq: Interrupt line to allocate 834 * @irq: Interrupt line to allocate
686 * @handler: Function to be called when the IRQ occurs 835 * @handler: Function to be called when the IRQ occurs.
836 * Primary handler for threaded interrupts
837 * @thread_fn: Function called from the irq handler thread
838 * If NULL, no irq thread is created
687 * @irqflags: Interrupt type flags 839 * @irqflags: Interrupt type flags
688 * @devname: An ascii name for the claiming device 840 * @devname: An ascii name for the claiming device
689 * @dev_id: A cookie passed back to the handler function 841 * @dev_id: A cookie passed back to the handler function
@@ -695,6 +847,15 @@ EXPORT_SYMBOL(free_irq);
695 * raises, you must take care both to initialise your hardware 847 * raises, you must take care both to initialise your hardware
696 * and to set up the interrupt handler in the right order. 848 * and to set up the interrupt handler in the right order.
697 * 849 *
850 * If you want to set up a threaded irq handler for your device
851 * then you need to supply @handler and @thread_fn. @handler ist
852 * still called in hard interrupt context and has to check
853 * whether the interrupt originates from the device. If yes it
854 * needs to disable the interrupt on the device and return
855 * IRQ_THREAD_WAKE which will wake up the handler thread and run
856 * @thread_fn. This split handler design is necessary to support
857 * shared interrupts.
858 *
698 * Dev_id must be globally unique. Normally the address of the 859 * Dev_id must be globally unique. Normally the address of the
699 * device data structure is used as the cookie. Since the handler 860 * device data structure is used as the cookie. Since the handler
700 * receives this value it makes sense to use it. 861 * receives this value it makes sense to use it.
@@ -710,8 +871,9 @@ EXPORT_SYMBOL(free_irq);
710 * IRQF_TRIGGER_* Specify active edge(s) or level 871 * IRQF_TRIGGER_* Specify active edge(s) or level
711 * 872 *
712 */ 873 */
713int request_irq(unsigned int irq, irq_handler_t handler, 874int request_threaded_irq(unsigned int irq, irq_handler_t handler,
714 unsigned long irqflags, const char *devname, void *dev_id) 875 irq_handler_t thread_fn, unsigned long irqflags,
876 const char *devname, void *dev_id)
715{ 877{
716 struct irqaction *action; 878 struct irqaction *action;
717 struct irq_desc *desc; 879 struct irq_desc *desc;
@@ -759,6 +921,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
759 return -ENOMEM; 921 return -ENOMEM;
760 922
761 action->handler = handler; 923 action->handler = handler;
924 action->thread_fn = thread_fn;
762 action->flags = irqflags; 925 action->flags = irqflags;
763 action->name = devname; 926 action->name = devname;
764 action->dev_id = dev_id; 927 action->dev_id = dev_id;
@@ -788,4 +951,4 @@ int request_irq(unsigned int irq, irq_handler_t handler,
788#endif 951#endif
789 return retval; 952 return retval;
790} 953}
791EXPORT_SYMBOL(request_irq); 954EXPORT_SYMBOL(request_threaded_irq);
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 243d6121e50e..44bbdcbaf8d2 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -54,6 +54,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
54static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) 54static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
55{ 55{
56 free_kstat_irqs(old_desc, desc); 56 free_kstat_irqs(old_desc, desc);
57 free_desc_masks(old_desc, desc);
57 arch_free_chip_data(old_desc, desc); 58 arch_free_chip_data(old_desc, desc);
58} 59}
59 60
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 5016bfb682b9..a5e74ddee0e2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -68,7 +68,7 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
68static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; 68static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
69 69
70/* NOTE: change this value only with kprobe_mutex held */ 70/* NOTE: change this value only with kprobe_mutex held */
71static bool kprobe_enabled; 71static bool kprobes_all_disarmed;
72 72
73static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 73static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
74static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 74static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
@@ -328,7 +328,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
328 struct kprobe *kp; 328 struct kprobe *kp;
329 329
330 list_for_each_entry_rcu(kp, &p->list, list) { 330 list_for_each_entry_rcu(kp, &p->list, list) {
331 if (kp->pre_handler && !kprobe_gone(kp)) { 331 if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
332 set_kprobe_instance(kp); 332 set_kprobe_instance(kp);
333 if (kp->pre_handler(kp, regs)) 333 if (kp->pre_handler(kp, regs))
334 return 1; 334 return 1;
@@ -344,7 +344,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
344 struct kprobe *kp; 344 struct kprobe *kp;
345 345
346 list_for_each_entry_rcu(kp, &p->list, list) { 346 list_for_each_entry_rcu(kp, &p->list, list) {
347 if (kp->post_handler && !kprobe_gone(kp)) { 347 if (kp->post_handler && likely(!kprobe_disabled(kp))) {
348 set_kprobe_instance(kp); 348 set_kprobe_instance(kp);
349 kp->post_handler(kp, regs, flags); 349 kp->post_handler(kp, regs, flags);
350 reset_kprobe_instance(); 350 reset_kprobe_instance();
@@ -518,20 +518,28 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
518} 518}
519 519
520/* 520/*
521* Add the new probe to old_p->list. Fail if this is the 521* Add the new probe to ap->list. Fail if this is the
522* second jprobe at the address - two jprobes can't coexist 522* second jprobe at the address - two jprobes can't coexist
523*/ 523*/
524static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) 524static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
525{ 525{
526 BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
526 if (p->break_handler) { 527 if (p->break_handler) {
527 if (old_p->break_handler) 528 if (ap->break_handler)
528 return -EEXIST; 529 return -EEXIST;
529 list_add_tail_rcu(&p->list, &old_p->list); 530 list_add_tail_rcu(&p->list, &ap->list);
530 old_p->break_handler = aggr_break_handler; 531 ap->break_handler = aggr_break_handler;
531 } else 532 } else
532 list_add_rcu(&p->list, &old_p->list); 533 list_add_rcu(&p->list, &ap->list);
533 if (p->post_handler && !old_p->post_handler) 534 if (p->post_handler && !ap->post_handler)
534 old_p->post_handler = aggr_post_handler; 535 ap->post_handler = aggr_post_handler;
536
537 if (kprobe_disabled(ap) && !kprobe_disabled(p)) {
538 ap->flags &= ~KPROBE_FLAG_DISABLED;
539 if (!kprobes_all_disarmed)
540 /* Arm the breakpoint again. */
541 arch_arm_kprobe(ap);
542 }
535 return 0; 543 return 0;
536} 544}
537 545
@@ -544,6 +552,7 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
544 copy_kprobe(p, ap); 552 copy_kprobe(p, ap);
545 flush_insn_slot(ap); 553 flush_insn_slot(ap);
546 ap->addr = p->addr; 554 ap->addr = p->addr;
555 ap->flags = p->flags;
547 ap->pre_handler = aggr_pre_handler; 556 ap->pre_handler = aggr_pre_handler;
548 ap->fault_handler = aggr_fault_handler; 557 ap->fault_handler = aggr_fault_handler;
549 /* We don't care the kprobe which has gone. */ 558 /* We don't care the kprobe which has gone. */
@@ -566,44 +575,59 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
566 struct kprobe *p) 575 struct kprobe *p)
567{ 576{
568 int ret = 0; 577 int ret = 0;
569 struct kprobe *ap; 578 struct kprobe *ap = old_p;
570 579
571 if (kprobe_gone(old_p)) { 580 if (old_p->pre_handler != aggr_pre_handler) {
581 /* If old_p is not an aggr_probe, create new aggr_kprobe. */
582 ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
583 if (!ap)
584 return -ENOMEM;
585 add_aggr_kprobe(ap, old_p);
586 }
587
588 if (kprobe_gone(ap)) {
572 /* 589 /*
573 * Attempting to insert new probe at the same location that 590 * Attempting to insert new probe at the same location that
574 * had a probe in the module vaddr area which already 591 * had a probe in the module vaddr area which already
575 * freed. So, the instruction slot has already been 592 * freed. So, the instruction slot has already been
576 * released. We need a new slot for the new probe. 593 * released. We need a new slot for the new probe.
577 */ 594 */
578 ret = arch_prepare_kprobe(old_p); 595 ret = arch_prepare_kprobe(ap);
579 if (ret) 596 if (ret)
597 /*
598 * Even if fail to allocate new slot, don't need to
599 * free aggr_probe. It will be used next time, or
600 * freed by unregister_kprobe.
601 */
580 return ret; 602 return ret;
581 } 603
582 if (old_p->pre_handler == aggr_pre_handler) {
583 copy_kprobe(old_p, p);
584 ret = add_new_kprobe(old_p, p);
585 ap = old_p;
586 } else {
587 ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
588 if (!ap) {
589 if (kprobe_gone(old_p))
590 arch_remove_kprobe(old_p);
591 return -ENOMEM;
592 }
593 add_aggr_kprobe(ap, old_p);
594 copy_kprobe(ap, p);
595 ret = add_new_kprobe(ap, p);
596 }
597 if (kprobe_gone(old_p)) {
598 /* 604 /*
599 * If the old_p has gone, its breakpoint has been disarmed. 605 * Clear gone flag to prevent allocating new slot again, and
600 * We have to arm it again after preparing real kprobes. 606 * set disabled flag because it is not armed yet.
601 */ 607 */
602 ap->flags &= ~KPROBE_FLAG_GONE; 608 ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
603 if (kprobe_enabled) 609 | KPROBE_FLAG_DISABLED;
604 arch_arm_kprobe(ap);
605 } 610 }
606 return ret; 611
612 copy_kprobe(ap, p);
613 return add_new_kprobe(ap, p);
614}
615
616/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
617static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
618{
619 struct kprobe *kp;
620
621 list_for_each_entry_rcu(kp, &p->list, list) {
622 if (!kprobe_disabled(kp))
623 /*
624 * There is an active probe on the list.
625 * We can't disable aggr_kprobe.
626 */
627 return 0;
628 }
629 p->flags |= KPROBE_FLAG_DISABLED;
630 return 1;
607} 631}
608 632
609static int __kprobes in_kprobes_functions(unsigned long addr) 633static int __kprobes in_kprobes_functions(unsigned long addr)
@@ -664,7 +688,9 @@ int __kprobes register_kprobe(struct kprobe *p)
664 return -EINVAL; 688 return -EINVAL;
665 } 689 }
666 690
667 p->flags = 0; 691 /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
692 p->flags &= KPROBE_FLAG_DISABLED;
693
668 /* 694 /*
669 * Check if are we probing a module. 695 * Check if are we probing a module.
670 */ 696 */
@@ -709,7 +735,7 @@ int __kprobes register_kprobe(struct kprobe *p)
709 hlist_add_head_rcu(&p->hlist, 735 hlist_add_head_rcu(&p->hlist,
710 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 736 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
711 737
712 if (kprobe_enabled) 738 if (!kprobes_all_disarmed && !kprobe_disabled(p))
713 arch_arm_kprobe(p); 739 arch_arm_kprobe(p);
714 740
715out_unlock_text: 741out_unlock_text:
@@ -722,26 +748,39 @@ out:
722 748
723 return ret; 749 return ret;
724} 750}
751EXPORT_SYMBOL_GPL(register_kprobe);
725 752
726/* 753/* Check passed kprobe is valid and return kprobe in kprobe_table. */
727 * Unregister a kprobe without a scheduler synchronization. 754static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
728 */
729static int __kprobes __unregister_kprobe_top(struct kprobe *p)
730{ 755{
731 struct kprobe *old_p, *list_p; 756 struct kprobe *old_p, *list_p;
732 757
733 old_p = get_kprobe(p->addr); 758 old_p = get_kprobe(p->addr);
734 if (unlikely(!old_p)) 759 if (unlikely(!old_p))
735 return -EINVAL; 760 return NULL;
736 761
737 if (p != old_p) { 762 if (p != old_p) {
738 list_for_each_entry_rcu(list_p, &old_p->list, list) 763 list_for_each_entry_rcu(list_p, &old_p->list, list)
739 if (list_p == p) 764 if (list_p == p)
740 /* kprobe p is a valid probe */ 765 /* kprobe p is a valid probe */
741 goto valid_p; 766 goto valid;
742 return -EINVAL; 767 return NULL;
743 } 768 }
744valid_p: 769valid:
770 return old_p;
771}
772
773/*
774 * Unregister a kprobe without a scheduler synchronization.
775 */
776static int __kprobes __unregister_kprobe_top(struct kprobe *p)
777{
778 struct kprobe *old_p, *list_p;
779
780 old_p = __get_valid_kprobe(p);
781 if (old_p == NULL)
782 return -EINVAL;
783
745 if (old_p == p || 784 if (old_p == p ||
746 (old_p->pre_handler == aggr_pre_handler && 785 (old_p->pre_handler == aggr_pre_handler &&
747 list_is_singular(&old_p->list))) { 786 list_is_singular(&old_p->list))) {
@@ -750,7 +789,7 @@ valid_p:
750 * enabled and not gone - otherwise, the breakpoint would 789 * enabled and not gone - otherwise, the breakpoint would
751 * already have been removed. We save on flushing icache. 790 * already have been removed. We save on flushing icache.
752 */ 791 */
753 if (kprobe_enabled && !kprobe_gone(old_p)) { 792 if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) {
754 mutex_lock(&text_mutex); 793 mutex_lock(&text_mutex);
755 arch_disarm_kprobe(p); 794 arch_disarm_kprobe(p);
756 mutex_unlock(&text_mutex); 795 mutex_unlock(&text_mutex);
@@ -768,6 +807,11 @@ valid_p:
768 } 807 }
769noclean: 808noclean:
770 list_del_rcu(&p->list); 809 list_del_rcu(&p->list);
810 if (!kprobe_disabled(old_p)) {
811 try_to_disable_aggr_kprobe(old_p);
812 if (!kprobes_all_disarmed && kprobe_disabled(old_p))
813 arch_disarm_kprobe(old_p);
814 }
771 } 815 }
772 return 0; 816 return 0;
773} 817}
@@ -803,11 +847,13 @@ int __kprobes register_kprobes(struct kprobe **kps, int num)
803 } 847 }
804 return ret; 848 return ret;
805} 849}
850EXPORT_SYMBOL_GPL(register_kprobes);
806 851
807void __kprobes unregister_kprobe(struct kprobe *p) 852void __kprobes unregister_kprobe(struct kprobe *p)
808{ 853{
809 unregister_kprobes(&p, 1); 854 unregister_kprobes(&p, 1);
810} 855}
856EXPORT_SYMBOL_GPL(unregister_kprobe);
811 857
812void __kprobes unregister_kprobes(struct kprobe **kps, int num) 858void __kprobes unregister_kprobes(struct kprobe **kps, int num)
813{ 859{
@@ -826,6 +872,7 @@ void __kprobes unregister_kprobes(struct kprobe **kps, int num)
826 if (kps[i]->addr) 872 if (kps[i]->addr)
827 __unregister_kprobe_bottom(kps[i]); 873 __unregister_kprobe_bottom(kps[i]);
828} 874}
875EXPORT_SYMBOL_GPL(unregister_kprobes);
829 876
830static struct notifier_block kprobe_exceptions_nb = { 877static struct notifier_block kprobe_exceptions_nb = {
831 .notifier_call = kprobe_exceptions_notify, 878 .notifier_call = kprobe_exceptions_notify,
@@ -865,16 +912,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num)
865 } 912 }
866 return ret; 913 return ret;
867} 914}
915EXPORT_SYMBOL_GPL(register_jprobes);
868 916
869int __kprobes register_jprobe(struct jprobe *jp) 917int __kprobes register_jprobe(struct jprobe *jp)
870{ 918{
871 return register_jprobes(&jp, 1); 919 return register_jprobes(&jp, 1);
872} 920}
921EXPORT_SYMBOL_GPL(register_jprobe);
873 922
874void __kprobes unregister_jprobe(struct jprobe *jp) 923void __kprobes unregister_jprobe(struct jprobe *jp)
875{ 924{
876 unregister_jprobes(&jp, 1); 925 unregister_jprobes(&jp, 1);
877} 926}
927EXPORT_SYMBOL_GPL(unregister_jprobe);
878 928
879void __kprobes unregister_jprobes(struct jprobe **jps, int num) 929void __kprobes unregister_jprobes(struct jprobe **jps, int num)
880{ 930{
@@ -894,6 +944,7 @@ void __kprobes unregister_jprobes(struct jprobe **jps, int num)
894 __unregister_kprobe_bottom(&jps[i]->kp); 944 __unregister_kprobe_bottom(&jps[i]->kp);
895 } 945 }
896} 946}
947EXPORT_SYMBOL_GPL(unregister_jprobes);
897 948
898#ifdef CONFIG_KRETPROBES 949#ifdef CONFIG_KRETPROBES
899/* 950/*
@@ -987,6 +1038,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
987 free_rp_inst(rp); 1038 free_rp_inst(rp);
988 return ret; 1039 return ret;
989} 1040}
1041EXPORT_SYMBOL_GPL(register_kretprobe);
990 1042
991int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1043int __kprobes register_kretprobes(struct kretprobe **rps, int num)
992{ 1044{
@@ -1004,11 +1056,13 @@ int __kprobes register_kretprobes(struct kretprobe **rps, int num)
1004 } 1056 }
1005 return ret; 1057 return ret;
1006} 1058}
1059EXPORT_SYMBOL_GPL(register_kretprobes);
1007 1060
1008void __kprobes unregister_kretprobe(struct kretprobe *rp) 1061void __kprobes unregister_kretprobe(struct kretprobe *rp)
1009{ 1062{
1010 unregister_kretprobes(&rp, 1); 1063 unregister_kretprobes(&rp, 1);
1011} 1064}
1065EXPORT_SYMBOL_GPL(unregister_kretprobe);
1012 1066
1013void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1067void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1014{ 1068{
@@ -1030,24 +1084,30 @@ void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1030 } 1084 }
1031 } 1085 }
1032} 1086}
1087EXPORT_SYMBOL_GPL(unregister_kretprobes);
1033 1088
1034#else /* CONFIG_KRETPROBES */ 1089#else /* CONFIG_KRETPROBES */
1035int __kprobes register_kretprobe(struct kretprobe *rp) 1090int __kprobes register_kretprobe(struct kretprobe *rp)
1036{ 1091{
1037 return -ENOSYS; 1092 return -ENOSYS;
1038} 1093}
1094EXPORT_SYMBOL_GPL(register_kretprobe);
1039 1095
1040int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1096int __kprobes register_kretprobes(struct kretprobe **rps, int num)
1041{ 1097{
1042 return -ENOSYS; 1098 return -ENOSYS;
1043} 1099}
1100EXPORT_SYMBOL_GPL(register_kretprobes);
1101
1044void __kprobes unregister_kretprobe(struct kretprobe *rp) 1102void __kprobes unregister_kretprobe(struct kretprobe *rp)
1045{ 1103{
1046} 1104}
1105EXPORT_SYMBOL_GPL(unregister_kretprobe);
1047 1106
1048void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1107void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1049{ 1108{
1050} 1109}
1110EXPORT_SYMBOL_GPL(unregister_kretprobes);
1051 1111
1052static int __kprobes pre_handler_kretprobe(struct kprobe *p, 1112static int __kprobes pre_handler_kretprobe(struct kprobe *p,
1053 struct pt_regs *regs) 1113 struct pt_regs *regs)
@@ -1061,6 +1121,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
1061static void __kprobes kill_kprobe(struct kprobe *p) 1121static void __kprobes kill_kprobe(struct kprobe *p)
1062{ 1122{
1063 struct kprobe *kp; 1123 struct kprobe *kp;
1124
1064 p->flags |= KPROBE_FLAG_GONE; 1125 p->flags |= KPROBE_FLAG_GONE;
1065 if (p->pre_handler == aggr_pre_handler) { 1126 if (p->pre_handler == aggr_pre_handler) {
1066 /* 1127 /*
@@ -1173,8 +1234,8 @@ static int __init init_kprobes(void)
1173 } 1234 }
1174 } 1235 }
1175 1236
1176 /* By default, kprobes are enabled */ 1237 /* By default, kprobes are armed */
1177 kprobe_enabled = true; 1238 kprobes_all_disarmed = false;
1178 1239
1179 err = arch_init_kprobes(); 1240 err = arch_init_kprobes();
1180 if (!err) 1241 if (!err)
@@ -1202,12 +1263,18 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
1202 else 1263 else
1203 kprobe_type = "k"; 1264 kprobe_type = "k";
1204 if (sym) 1265 if (sym)
1205 seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type, 1266 seq_printf(pi, "%p %s %s+0x%x %s %s%s\n",
1206 sym, offset, (modname ? modname : " "), 1267 p->addr, kprobe_type, sym, offset,
1207 (kprobe_gone(p) ? "[GONE]" : "")); 1268 (modname ? modname : " "),
1269 (kprobe_gone(p) ? "[GONE]" : ""),
1270 ((kprobe_disabled(p) && !kprobe_gone(p)) ?
1271 "[DISABLED]" : ""));
1208 else 1272 else
1209 seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr, 1273 seq_printf(pi, "%p %s %p %s%s\n",
1210 (kprobe_gone(p) ? "[GONE]" : "")); 1274 p->addr, kprobe_type, p->addr,
1275 (kprobe_gone(p) ? "[GONE]" : ""),
1276 ((kprobe_disabled(p) && !kprobe_gone(p)) ?
1277 "[DISABLED]" : ""));
1211} 1278}
1212 1279
1213static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) 1280static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -1272,7 +1339,72 @@ static struct file_operations debugfs_kprobes_operations = {
1272 .release = seq_release, 1339 .release = seq_release,
1273}; 1340};
1274 1341
1275static void __kprobes enable_all_kprobes(void) 1342/* Disable one kprobe */
1343int __kprobes disable_kprobe(struct kprobe *kp)
1344{
1345 int ret = 0;
1346 struct kprobe *p;
1347
1348 mutex_lock(&kprobe_mutex);
1349
1350 /* Check whether specified probe is valid. */
1351 p = __get_valid_kprobe(kp);
1352 if (unlikely(p == NULL)) {
1353 ret = -EINVAL;
1354 goto out;
1355 }
1356
1357 /* If the probe is already disabled (or gone), just return */
1358 if (kprobe_disabled(kp))
1359 goto out;
1360
1361 kp->flags |= KPROBE_FLAG_DISABLED;
1362 if (p != kp)
1363 /* When kp != p, p is always enabled. */
1364 try_to_disable_aggr_kprobe(p);
1365
1366 if (!kprobes_all_disarmed && kprobe_disabled(p))
1367 arch_disarm_kprobe(p);
1368out:
1369 mutex_unlock(&kprobe_mutex);
1370 return ret;
1371}
1372EXPORT_SYMBOL_GPL(disable_kprobe);
1373
1374/* Enable one kprobe */
1375int __kprobes enable_kprobe(struct kprobe *kp)
1376{
1377 int ret = 0;
1378 struct kprobe *p;
1379
1380 mutex_lock(&kprobe_mutex);
1381
1382 /* Check whether specified probe is valid. */
1383 p = __get_valid_kprobe(kp);
1384 if (unlikely(p == NULL)) {
1385 ret = -EINVAL;
1386 goto out;
1387 }
1388
1389 if (kprobe_gone(kp)) {
1390 /* This kprobe has gone, we couldn't enable it. */
1391 ret = -EINVAL;
1392 goto out;
1393 }
1394
1395 if (!kprobes_all_disarmed && kprobe_disabled(p))
1396 arch_arm_kprobe(p);
1397
1398 p->flags &= ~KPROBE_FLAG_DISABLED;
1399 if (p != kp)
1400 kp->flags &= ~KPROBE_FLAG_DISABLED;
1401out:
1402 mutex_unlock(&kprobe_mutex);
1403 return ret;
1404}
1405EXPORT_SYMBOL_GPL(enable_kprobe);
1406
1407static void __kprobes arm_all_kprobes(void)
1276{ 1408{
1277 struct hlist_head *head; 1409 struct hlist_head *head;
1278 struct hlist_node *node; 1410 struct hlist_node *node;
@@ -1281,20 +1413,20 @@ static void __kprobes enable_all_kprobes(void)
1281 1413
1282 mutex_lock(&kprobe_mutex); 1414 mutex_lock(&kprobe_mutex);
1283 1415
1284 /* If kprobes are already enabled, just return */ 1416 /* If kprobes are armed, just return */
1285 if (kprobe_enabled) 1417 if (!kprobes_all_disarmed)
1286 goto already_enabled; 1418 goto already_enabled;
1287 1419
1288 mutex_lock(&text_mutex); 1420 mutex_lock(&text_mutex);
1289 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1421 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1290 head = &kprobe_table[i]; 1422 head = &kprobe_table[i];
1291 hlist_for_each_entry_rcu(p, node, head, hlist) 1423 hlist_for_each_entry_rcu(p, node, head, hlist)
1292 if (!kprobe_gone(p)) 1424 if (!kprobe_disabled(p))
1293 arch_arm_kprobe(p); 1425 arch_arm_kprobe(p);
1294 } 1426 }
1295 mutex_unlock(&text_mutex); 1427 mutex_unlock(&text_mutex);
1296 1428
1297 kprobe_enabled = true; 1429 kprobes_all_disarmed = false;
1298 printk(KERN_INFO "Kprobes globally enabled\n"); 1430 printk(KERN_INFO "Kprobes globally enabled\n");
1299 1431
1300already_enabled: 1432already_enabled:
@@ -1302,7 +1434,7 @@ already_enabled:
1302 return; 1434 return;
1303} 1435}
1304 1436
1305static void __kprobes disable_all_kprobes(void) 1437static void __kprobes disarm_all_kprobes(void)
1306{ 1438{
1307 struct hlist_head *head; 1439 struct hlist_head *head;
1308 struct hlist_node *node; 1440 struct hlist_node *node;
@@ -1311,17 +1443,17 @@ static void __kprobes disable_all_kprobes(void)
1311 1443
1312 mutex_lock(&kprobe_mutex); 1444 mutex_lock(&kprobe_mutex);
1313 1445
1314 /* If kprobes are already disabled, just return */ 1446 /* If kprobes are already disarmed, just return */
1315 if (!kprobe_enabled) 1447 if (kprobes_all_disarmed)
1316 goto already_disabled; 1448 goto already_disabled;
1317 1449
1318 kprobe_enabled = false; 1450 kprobes_all_disarmed = true;
1319 printk(KERN_INFO "Kprobes globally disabled\n"); 1451 printk(KERN_INFO "Kprobes globally disabled\n");
1320 mutex_lock(&text_mutex); 1452 mutex_lock(&text_mutex);
1321 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1453 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1322 head = &kprobe_table[i]; 1454 head = &kprobe_table[i];
1323 hlist_for_each_entry_rcu(p, node, head, hlist) { 1455 hlist_for_each_entry_rcu(p, node, head, hlist) {
1324 if (!arch_trampoline_kprobe(p) && !kprobe_gone(p)) 1456 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
1325 arch_disarm_kprobe(p); 1457 arch_disarm_kprobe(p);
1326 } 1458 }
1327 } 1459 }
@@ -1347,7 +1479,7 @@ static ssize_t read_enabled_file_bool(struct file *file,
1347{ 1479{
1348 char buf[3]; 1480 char buf[3];
1349 1481
1350 if (kprobe_enabled) 1482 if (!kprobes_all_disarmed)
1351 buf[0] = '1'; 1483 buf[0] = '1';
1352 else 1484 else
1353 buf[0] = '0'; 1485 buf[0] = '0';
@@ -1370,12 +1502,12 @@ static ssize_t write_enabled_file_bool(struct file *file,
1370 case 'y': 1502 case 'y':
1371 case 'Y': 1503 case 'Y':
1372 case '1': 1504 case '1':
1373 enable_all_kprobes(); 1505 arm_all_kprobes();
1374 break; 1506 break;
1375 case 'n': 1507 case 'n':
1376 case 'N': 1508 case 'N':
1377 case '0': 1509 case '0':
1378 disable_all_kprobes(); 1510 disarm_all_kprobes();
1379 break; 1511 break;
1380 } 1512 }
1381 1513
@@ -1418,16 +1550,5 @@ late_initcall(debugfs_kprobe_init);
1418 1550
1419module_init(init_kprobes); 1551module_init(init_kprobes);
1420 1552
1421EXPORT_SYMBOL_GPL(register_kprobe); 1553/* defined in arch/.../kernel/kprobes.c */
1422EXPORT_SYMBOL_GPL(unregister_kprobe);
1423EXPORT_SYMBOL_GPL(register_kprobes);
1424EXPORT_SYMBOL_GPL(unregister_kprobes);
1425EXPORT_SYMBOL_GPL(register_jprobe);
1426EXPORT_SYMBOL_GPL(unregister_jprobe);
1427EXPORT_SYMBOL_GPL(register_jprobes);
1428EXPORT_SYMBOL_GPL(unregister_jprobes);
1429EXPORT_SYMBOL_GPL(jprobe_return); 1554EXPORT_SYMBOL_GPL(jprobe_return);
1430EXPORT_SYMBOL_GPL(register_kretprobe);
1431EXPORT_SYMBOL_GPL(unregister_kretprobe);
1432EXPORT_SYMBOL_GPL(register_kretprobes);
1433EXPORT_SYMBOL_GPL(unregister_kretprobes);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 84bbadd4d021..4ebaf8519abf 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -76,6 +76,7 @@ static int kthread(void *_create)
76 76
77 /* OK, tell user we're spawned, wait for stop or wakeup */ 77 /* OK, tell user we're spawned, wait for stop or wakeup */
78 __set_current_state(TASK_UNINTERRUPTIBLE); 78 __set_current_state(TASK_UNINTERRUPTIBLE);
79 create->result = current;
79 complete(&create->started); 80 complete(&create->started);
80 schedule(); 81 schedule();
81 82
@@ -96,22 +97,10 @@ static void create_kthread(struct kthread_create_info *create)
96 97
97 /* We want our own signal handler (we take no signals by default). */ 98 /* We want our own signal handler (we take no signals by default). */
98 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); 99 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
99 if (pid < 0) { 100 if (pid < 0)
100 create->result = ERR_PTR(pid); 101 create->result = ERR_PTR(pid);
101 } else { 102 else
102 struct sched_param param = { .sched_priority = 0 };
103 wait_for_completion(&create->started); 103 wait_for_completion(&create->started);
104 read_lock(&tasklist_lock);
105 create->result = find_task_by_pid_ns(pid, &init_pid_ns);
106 read_unlock(&tasklist_lock);
107 /*
108 * root may have changed our (kthreadd's) priority or CPU mask.
109 * The kernel thread should not inherit these properties.
110 */
111 sched_setscheduler(create->result, SCHED_NORMAL, &param);
112 set_user_nice(create->result, KTHREAD_NICE_LEVEL);
113 set_cpus_allowed_ptr(create->result, cpu_all_mask);
114 }
115 complete(&create->done); 104 complete(&create->done);
116} 105}
117 106
@@ -154,11 +143,20 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
154 wait_for_completion(&create.done); 143 wait_for_completion(&create.done);
155 144
156 if (!IS_ERR(create.result)) { 145 if (!IS_ERR(create.result)) {
146 struct sched_param param = { .sched_priority = 0 };
157 va_list args; 147 va_list args;
148
158 va_start(args, namefmt); 149 va_start(args, namefmt);
159 vsnprintf(create.result->comm, sizeof(create.result->comm), 150 vsnprintf(create.result->comm, sizeof(create.result->comm),
160 namefmt, args); 151 namefmt, args);
161 va_end(args); 152 va_end(args);
153 /*
154 * root may have changed our (kthreadd's) priority or CPU mask.
155 * The kernel thread should not inherit these properties.
156 */
157 sched_setscheduler_nocheck(create.result, SCHED_NORMAL, &param);
158 set_user_nice(create.result, KTHREAD_NICE_LEVEL);
159 set_cpus_allowed_ptr(create.result, cpu_all_mask);
162 } 160 }
163 return create.result; 161 return create.result;
164} 162}
diff --git a/kernel/module.c b/kernel/module.c
index c268a771595c..05f014efa32c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1952,9 +1952,6 @@ static noinline struct module *load_module(void __user *umod,
1952 if (strstarts(secstrings+sechdrs[i].sh_name, ".exit")) 1952 if (strstarts(secstrings+sechdrs[i].sh_name, ".exit"))
1953 sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; 1953 sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC;
1954#endif 1954#endif
1955 /* Don't keep __versions around; it's just for loading. */
1956 if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0)
1957 sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC;
1958 } 1955 }
1959 1956
1960 modindex = find_sec(hdr, sechdrs, secstrings, 1957 modindex = find_sec(hdr, sechdrs, secstrings,
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 8e5d9a68b022..c9dcf98b4463 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -18,7 +18,7 @@ void update_rlimit_cpu(unsigned long rlim_new)
18 18
19 cputime = secs_to_cputime(rlim_new); 19 cputime = secs_to_cputime(rlim_new);
20 if (cputime_eq(current->signal->it_prof_expires, cputime_zero) || 20 if (cputime_eq(current->signal->it_prof_expires, cputime_zero) ||
21 cputime_lt(current->signal->it_prof_expires, cputime)) { 21 cputime_gt(current->signal->it_prof_expires, cputime)) {
22 spin_lock_irq(&current->sighand->siglock); 22 spin_lock_irq(&current->sighand->siglock);
23 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); 23 set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL);
24 spin_unlock_irq(&current->sighand->siglock); 24 spin_unlock_irq(&current->sighand->siglock);
@@ -224,7 +224,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
224 cpu->cpu = virt_ticks(p); 224 cpu->cpu = virt_ticks(p);
225 break; 225 break;
226 case CPUCLOCK_SCHED: 226 case CPUCLOCK_SCHED:
227 cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p); 227 cpu->sched = task_sched_runtime(p);
228 break; 228 break;
229 } 229 }
230 return 0; 230 return 0;
@@ -305,18 +305,19 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
305{ 305{
306 struct task_cputime cputime; 306 struct task_cputime cputime;
307 307
308 thread_group_cputime(p, &cputime);
309 switch (CPUCLOCK_WHICH(which_clock)) { 308 switch (CPUCLOCK_WHICH(which_clock)) {
310 default: 309 default:
311 return -EINVAL; 310 return -EINVAL;
312 case CPUCLOCK_PROF: 311 case CPUCLOCK_PROF:
312 thread_group_cputime(p, &cputime);
313 cpu->cpu = cputime_add(cputime.utime, cputime.stime); 313 cpu->cpu = cputime_add(cputime.utime, cputime.stime);
314 break; 314 break;
315 case CPUCLOCK_VIRT: 315 case CPUCLOCK_VIRT:
316 thread_group_cputime(p, &cputime);
316 cpu->cpu = cputime.utime; 317 cpu->cpu = cputime.utime;
317 break; 318 break;
318 case CPUCLOCK_SCHED: 319 case CPUCLOCK_SCHED:
319 cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); 320 cpu->sched = thread_group_sched_runtime(p);
320 break; 321 break;
321 } 322 }
322 return 0; 323 return 0;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index aaad0ec34194..64191fa09b7e 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -21,9 +21,7 @@
21#include <linux/audit.h> 21#include <linux/audit.h>
22#include <linux/pid_namespace.h> 22#include <linux/pid_namespace.h>
23#include <linux/syscalls.h> 23#include <linux/syscalls.h>
24 24#include <linux/uaccess.h>
25#include <asm/pgtable.h>
26#include <asm/uaccess.h>
27 25
28 26
29/* 27/*
@@ -48,7 +46,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent)
48 list_add(&child->ptrace_entry, &new_parent->ptraced); 46 list_add(&child->ptrace_entry, &new_parent->ptraced);
49 child->parent = new_parent; 47 child->parent = new_parent;
50} 48}
51 49
52/* 50/*
53 * Turn a tracing stop into a normal stop now, since with no tracer there 51 * Turn a tracing stop into a normal stop now, since with no tracer there
54 * would be no way to wake it up with SIGCONT or SIGKILL. If there was a 52 * would be no way to wake it up with SIGCONT or SIGKILL. If there was a
@@ -173,7 +171,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
173 task_lock(task); 171 task_lock(task);
174 err = __ptrace_may_access(task, mode); 172 err = __ptrace_may_access(task, mode);
175 task_unlock(task); 173 task_unlock(task);
176 return (!err ? true : false); 174 return !err;
177} 175}
178 176
179int ptrace_attach(struct task_struct *task) 177int ptrace_attach(struct task_struct *task)
@@ -358,7 +356,7 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst
358 copied += retval; 356 copied += retval;
359 src += retval; 357 src += retval;
360 dst += retval; 358 dst += retval;
361 len -= retval; 359 len -= retval;
362 } 360 }
363 return copied; 361 return copied;
364} 362}
@@ -383,7 +381,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
383 copied += retval; 381 copied += retval;
384 src += retval; 382 src += retval;
385 dst += retval; 383 dst += retval;
386 len -= retval; 384 len -= retval;
387 } 385 }
388 return copied; 386 return copied;
389} 387}
@@ -496,9 +494,9 @@ static int ptrace_resume(struct task_struct *child, long request, long data)
496 if (unlikely(!arch_has_single_step())) 494 if (unlikely(!arch_has_single_step()))
497 return -EIO; 495 return -EIO;
498 user_enable_single_step(child); 496 user_enable_single_step(child);
499 } 497 } else {
500 else
501 user_disable_single_step(child); 498 user_disable_single_step(child);
499 }
502 500
503 child->exit_code = data; 501 child->exit_code = data;
504 wake_up_process(child); 502 wake_up_process(child);
diff --git a/kernel/sched.c b/kernel/sched.c
index 6cc1fd5d5072..5724508c3b66 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1418,10 +1418,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
1418 struct rq_iterator *iterator); 1418 struct rq_iterator *iterator);
1419#endif 1419#endif
1420 1420
1421/* Time spent by the tasks of the cpu accounting group executing in ... */
1422enum cpuacct_stat_index {
1423 CPUACCT_STAT_USER, /* ... user mode */
1424 CPUACCT_STAT_SYSTEM, /* ... kernel mode */
1425
1426 CPUACCT_STAT_NSTATS,
1427};
1428
1421#ifdef CONFIG_CGROUP_CPUACCT 1429#ifdef CONFIG_CGROUP_CPUACCT
1422static void cpuacct_charge(struct task_struct *tsk, u64 cputime); 1430static void cpuacct_charge(struct task_struct *tsk, u64 cputime);
1431static void cpuacct_update_stats(struct task_struct *tsk,
1432 enum cpuacct_stat_index idx, cputime_t val);
1423#else 1433#else
1424static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} 1434static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {}
1435static inline void cpuacct_update_stats(struct task_struct *tsk,
1436 enum cpuacct_stat_index idx, cputime_t val) {}
1425#endif 1437#endif
1426 1438
1427static inline void inc_cpu_load(struct rq *rq, unsigned long load) 1439static inline void inc_cpu_load(struct rq *rq, unsigned long load)
@@ -4511,9 +4523,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat);
4511EXPORT_PER_CPU_SYMBOL(kstat); 4523EXPORT_PER_CPU_SYMBOL(kstat);
4512 4524
4513/* 4525/*
4514 * Return any ns on the sched_clock that have not yet been banked in 4526 * Return any ns on the sched_clock that have not yet been accounted in
4515 * @p in case that task is currently running. 4527 * @p in case that task is currently running.
4528 *
4529 * Called with task_rq_lock() held on @rq.
4516 */ 4530 */
4531static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
4532{
4533 u64 ns = 0;
4534
4535 if (task_current(rq, p)) {
4536 update_rq_clock(rq);
4537 ns = rq->clock - p->se.exec_start;
4538 if ((s64)ns < 0)
4539 ns = 0;
4540 }
4541
4542 return ns;
4543}
4544
4517unsigned long long task_delta_exec(struct task_struct *p) 4545unsigned long long task_delta_exec(struct task_struct *p)
4518{ 4546{
4519 unsigned long flags; 4547 unsigned long flags;
@@ -4521,16 +4549,49 @@ unsigned long long task_delta_exec(struct task_struct *p)
4521 u64 ns = 0; 4549 u64 ns = 0;
4522 4550
4523 rq = task_rq_lock(p, &flags); 4551 rq = task_rq_lock(p, &flags);
4552 ns = do_task_delta_exec(p, rq);
4553 task_rq_unlock(rq, &flags);
4524 4554
4525 if (task_current(rq, p)) { 4555 return ns;
4526 u64 delta_exec; 4556}
4527 4557
4528 update_rq_clock(rq); 4558/*
4529 delta_exec = rq->clock - p->se.exec_start; 4559 * Return accounted runtime for the task.
4530 if ((s64)delta_exec > 0) 4560 * In case the task is currently running, return the runtime plus current's
4531 ns = delta_exec; 4561 * pending runtime that have not been accounted yet.
4532 } 4562 */
4563unsigned long long task_sched_runtime(struct task_struct *p)
4564{
4565 unsigned long flags;
4566 struct rq *rq;
4567 u64 ns = 0;
4568
4569 rq = task_rq_lock(p, &flags);
4570 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
4571 task_rq_unlock(rq, &flags);
4572
4573 return ns;
4574}
4575
4576/*
4577 * Return sum_exec_runtime for the thread group.
4578 * In case the task is currently running, return the sum plus current's
4579 * pending runtime that have not been accounted yet.
4580 *
4581 * Note that the thread group might have other running tasks as well,
4582 * so the return value not includes other pending runtime that other
4583 * running tasks might have.
4584 */
4585unsigned long long thread_group_sched_runtime(struct task_struct *p)
4586{
4587 struct task_cputime totals;
4588 unsigned long flags;
4589 struct rq *rq;
4590 u64 ns;
4533 4591
4592 rq = task_rq_lock(p, &flags);
4593 thread_group_cputime(p, &totals);
4594 ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
4534 task_rq_unlock(rq, &flags); 4595 task_rq_unlock(rq, &flags);
4535 4596
4536 return ns; 4597 return ns;
@@ -4559,6 +4620,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
4559 cpustat->nice = cputime64_add(cpustat->nice, tmp); 4620 cpustat->nice = cputime64_add(cpustat->nice, tmp);
4560 else 4621 else
4561 cpustat->user = cputime64_add(cpustat->user, tmp); 4622 cpustat->user = cputime64_add(cpustat->user, tmp);
4623
4624 cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
4562 /* Account for user time used */ 4625 /* Account for user time used */
4563 acct_update_integrals(p); 4626 acct_update_integrals(p);
4564} 4627}
@@ -4620,6 +4683,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
4620 else 4683 else
4621 cpustat->system = cputime64_add(cpustat->system, tmp); 4684 cpustat->system = cputime64_add(cpustat->system, tmp);
4622 4685
4686 cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime);
4687
4623 /* Account for system time used */ 4688 /* Account for system time used */
4624 acct_update_integrals(p); 4689 acct_update_integrals(p);
4625} 4690}
@@ -7302,7 +7367,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
7302 cpumask_or(groupmask, groupmask, sched_group_cpus(group)); 7367 cpumask_or(groupmask, groupmask, sched_group_cpus(group));
7303 7368
7304 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); 7369 cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group));
7305 printk(KERN_CONT " %s", str); 7370 printk(KERN_CONT " %s (__cpu_power = %d)", str,
7371 group->__cpu_power);
7306 7372
7307 group = group->next; 7373 group = group->next;
7308 } while (group != sd->groups); 7374 } while (group != sd->groups);
@@ -9925,6 +9991,7 @@ struct cpuacct {
9925 struct cgroup_subsys_state css; 9991 struct cgroup_subsys_state css;
9926 /* cpuusage holds pointer to a u64-type object on every cpu */ 9992 /* cpuusage holds pointer to a u64-type object on every cpu */
9927 u64 *cpuusage; 9993 u64 *cpuusage;
9994 struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
9928 struct cpuacct *parent; 9995 struct cpuacct *parent;
9929}; 9996};
9930 9997
@@ -9949,20 +10016,32 @@ static struct cgroup_subsys_state *cpuacct_create(
9949 struct cgroup_subsys *ss, struct cgroup *cgrp) 10016 struct cgroup_subsys *ss, struct cgroup *cgrp)
9950{ 10017{
9951 struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); 10018 struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
10019 int i;
9952 10020
9953 if (!ca) 10021 if (!ca)
9954 return ERR_PTR(-ENOMEM); 10022 goto out;
9955 10023
9956 ca->cpuusage = alloc_percpu(u64); 10024 ca->cpuusage = alloc_percpu(u64);
9957 if (!ca->cpuusage) { 10025 if (!ca->cpuusage)
9958 kfree(ca); 10026 goto out_free_ca;
9959 return ERR_PTR(-ENOMEM); 10027
9960 } 10028 for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
10029 if (percpu_counter_init(&ca->cpustat[i], 0))
10030 goto out_free_counters;
9961 10031
9962 if (cgrp->parent) 10032 if (cgrp->parent)
9963 ca->parent = cgroup_ca(cgrp->parent); 10033 ca->parent = cgroup_ca(cgrp->parent);
9964 10034
9965 return &ca->css; 10035 return &ca->css;
10036
10037out_free_counters:
10038 while (--i >= 0)
10039 percpu_counter_destroy(&ca->cpustat[i]);
10040 free_percpu(ca->cpuusage);
10041out_free_ca:
10042 kfree(ca);
10043out:
10044 return ERR_PTR(-ENOMEM);
9966} 10045}
9967 10046
9968/* destroy an existing cpu accounting group */ 10047/* destroy an existing cpu accounting group */
@@ -9970,7 +10049,10 @@ static void
9970cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) 10049cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
9971{ 10050{
9972 struct cpuacct *ca = cgroup_ca(cgrp); 10051 struct cpuacct *ca = cgroup_ca(cgrp);
10052 int i;
9973 10053
10054 for (i = 0; i < CPUACCT_STAT_NSTATS; i++)
10055 percpu_counter_destroy(&ca->cpustat[i]);
9974 free_percpu(ca->cpuusage); 10056 free_percpu(ca->cpuusage);
9975 kfree(ca); 10057 kfree(ca);
9976} 10058}
@@ -10057,6 +10139,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft,
10057 return 0; 10139 return 0;
10058} 10140}
10059 10141
10142static const char *cpuacct_stat_desc[] = {
10143 [CPUACCT_STAT_USER] = "user",
10144 [CPUACCT_STAT_SYSTEM] = "system",
10145};
10146
10147static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
10148 struct cgroup_map_cb *cb)
10149{
10150 struct cpuacct *ca = cgroup_ca(cgrp);
10151 int i;
10152
10153 for (i = 0; i < CPUACCT_STAT_NSTATS; i++) {
10154 s64 val = percpu_counter_read(&ca->cpustat[i]);
10155 val = cputime64_to_clock_t(val);
10156 cb->fill(cb, cpuacct_stat_desc[i], val);
10157 }
10158 return 0;
10159}
10160
10060static struct cftype files[] = { 10161static struct cftype files[] = {
10061 { 10162 {
10062 .name = "usage", 10163 .name = "usage",
@@ -10067,7 +10168,10 @@ static struct cftype files[] = {
10067 .name = "usage_percpu", 10168 .name = "usage_percpu",
10068 .read_seq_string = cpuacct_percpu_seq_read, 10169 .read_seq_string = cpuacct_percpu_seq_read,
10069 }, 10170 },
10070 10171 {
10172 .name = "stat",
10173 .read_map = cpuacct_stats_show,
10174 },
10071}; 10175};
10072 10176
10073static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) 10177static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@ -10089,12 +10193,38 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
10089 return; 10193 return;
10090 10194
10091 cpu = task_cpu(tsk); 10195 cpu = task_cpu(tsk);
10196
10197 rcu_read_lock();
10198
10092 ca = task_ca(tsk); 10199 ca = task_ca(tsk);
10093 10200
10094 for (; ca; ca = ca->parent) { 10201 for (; ca; ca = ca->parent) {
10095 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); 10202 u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
10096 *cpuusage += cputime; 10203 *cpuusage += cputime;
10097 } 10204 }
10205
10206 rcu_read_unlock();
10207}
10208
10209/*
10210 * Charge the system/user time to the task's accounting group.
10211 */
10212static void cpuacct_update_stats(struct task_struct *tsk,
10213 enum cpuacct_stat_index idx, cputime_t val)
10214{
10215 struct cpuacct *ca;
10216
10217 if (unlikely(!cpuacct_subsys.active))
10218 return;
10219
10220 rcu_read_lock();
10221 ca = task_ca(tsk);
10222
10223 do {
10224 percpu_counter_add(&ca->cpustat[idx], val);
10225 ca = ca->parent;
10226 } while (ca);
10227 rcu_read_unlock();
10098} 10228}
10099 10229
10100struct cgroup_subsys cpuacct_subsys = { 10230struct cgroup_subsys cpuacct_subsys = {
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 1e00bfacf9b8..cdd3c89574cd 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -55,7 +55,7 @@ static int convert_prio(int prio)
55 * cpupri_find - find the best (lowest-pri) CPU in the system 55 * cpupri_find - find the best (lowest-pri) CPU in the system
56 * @cp: The cpupri context 56 * @cp: The cpupri context
57 * @p: The task 57 * @p: The task
58 * @lowest_mask: A mask to fill in with selected CPUs 58 * @lowest_mask: A mask to fill in with selected CPUs (or NULL)
59 * 59 *
60 * Note: This function returns the recommended CPUs as calculated during the 60 * Note: This function returns the recommended CPUs as calculated during the
61 * current invokation. By the time the call returns, the CPUs may have in 61 * current invokation. By the time the call returns, the CPUs may have in
@@ -81,7 +81,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) 81 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
82 continue; 82 continue;
83 83
84 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); 84 if (lowest_mask)
85 cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask);
85 return 1; 86 return 1;
86 } 87 }
87 88
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 299d012b4394..f2c66f8f9712 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -948,20 +948,15 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
948 948
949static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 949static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
950{ 950{
951 cpumask_var_t mask;
952
953 if (rq->curr->rt.nr_cpus_allowed == 1) 951 if (rq->curr->rt.nr_cpus_allowed == 1)
954 return; 952 return;
955 953
956 if (!alloc_cpumask_var(&mask, GFP_ATOMIC))
957 return;
958
959 if (p->rt.nr_cpus_allowed != 1 954 if (p->rt.nr_cpus_allowed != 1
960 && cpupri_find(&rq->rd->cpupri, p, mask)) 955 && cpupri_find(&rq->rd->cpupri, p, NULL))
961 goto free; 956 return;
962 957
963 if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask)) 958 if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL))
964 goto free; 959 return;
965 960
966 /* 961 /*
967 * There appears to be other cpus that can accept 962 * There appears to be other cpus that can accept
@@ -970,8 +965,6 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
970 */ 965 */
971 requeue_task_rt(rq, p, 1); 966 requeue_task_rt(rq, p, 1);
972 resched_task(rq->curr); 967 resched_task(rq->curr);
973free:
974 free_cpumask_var(mask);
975} 968}
976 969
977#endif /* CONFIG_SMP */ 970#endif /* CONFIG_SMP */
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 85d5a2455103..88796c330838 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -166,97 +166,11 @@ void softlockup_tick(void)
166} 166}
167 167
168/* 168/*
169 * Have a reasonable limit on the number of tasks checked:
170 */
171unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
172
173/*
174 * Zero means infinite timeout - no checking done:
175 */
176unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
177
178unsigned long __read_mostly sysctl_hung_task_warnings = 10;
179
180/*
181 * Only do the hung-tasks check on one CPU:
182 */
183static int check_cpu __read_mostly = -1;
184
185static void check_hung_task(struct task_struct *t, unsigned long now)
186{
187 unsigned long switch_count = t->nvcsw + t->nivcsw;
188
189 if (t->flags & PF_FROZEN)
190 return;
191
192 if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
193 t->last_switch_count = switch_count;
194 t->last_switch_timestamp = now;
195 return;
196 }
197 if ((long)(now - t->last_switch_timestamp) <
198 sysctl_hung_task_timeout_secs)
199 return;
200 if (!sysctl_hung_task_warnings)
201 return;
202 sysctl_hung_task_warnings--;
203
204 /*
205 * Ok, the task did not get scheduled for more than 2 minutes,
206 * complain:
207 */
208 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
209 "%ld seconds.\n", t->comm, t->pid,
210 sysctl_hung_task_timeout_secs);
211 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
212 " disables this message.\n");
213 sched_show_task(t);
214 __debug_show_held_locks(t);
215
216 t->last_switch_timestamp = now;
217 touch_nmi_watchdog();
218
219 if (softlockup_panic)
220 panic("softlockup: blocked tasks");
221}
222
223/*
224 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
225 * a really long time (120 seconds). If that happens, print out
226 * a warning.
227 */
228static void check_hung_uninterruptible_tasks(int this_cpu)
229{
230 int max_count = sysctl_hung_task_check_count;
231 unsigned long now = get_timestamp(this_cpu);
232 struct task_struct *g, *t;
233
234 /*
235 * If the system crashed already then all bets are off,
236 * do not report extra hung tasks:
237 */
238 if (test_taint(TAINT_DIE) || did_panic)
239 return;
240
241 read_lock(&tasklist_lock);
242 do_each_thread(g, t) {
243 if (!--max_count)
244 goto unlock;
245 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
246 if (t->state == TASK_UNINTERRUPTIBLE)
247 check_hung_task(t, now);
248 } while_each_thread(g, t);
249 unlock:
250 read_unlock(&tasklist_lock);
251}
252
253/*
254 * The watchdog thread - runs every second and touches the timestamp. 169 * The watchdog thread - runs every second and touches the timestamp.
255 */ 170 */
256static int watchdog(void *__bind_cpu) 171static int watchdog(void *__bind_cpu)
257{ 172{
258 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 173 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
259 int this_cpu = (long)__bind_cpu;
260 174
261 sched_setscheduler(current, SCHED_FIFO, &param); 175 sched_setscheduler(current, SCHED_FIFO, &param);
262 176
@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu)
276 if (kthread_should_stop()) 190 if (kthread_should_stop())
277 break; 191 break;
278 192
279 if (this_cpu == check_cpu) {
280 if (sysctl_hung_task_timeout_secs)
281 check_hung_uninterruptible_tasks(this_cpu);
282 }
283
284 set_current_state(TASK_INTERRUPTIBLE); 193 set_current_state(TASK_INTERRUPTIBLE);
285 } 194 }
286 __set_current_state(TASK_RUNNING); 195 __set_current_state(TASK_RUNNING);
@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
312 break; 221 break;
313 case CPU_ONLINE: 222 case CPU_ONLINE:
314 case CPU_ONLINE_FROZEN: 223 case CPU_ONLINE_FROZEN:
315 check_cpu = cpumask_any(cpu_online_mask);
316 wake_up_process(per_cpu(watchdog_task, hotcpu)); 224 wake_up_process(per_cpu(watchdog_task, hotcpu));
317 break; 225 break;
318#ifdef CONFIG_HOTPLUG_CPU 226#ifdef CONFIG_HOTPLUG_CPU
319 case CPU_DOWN_PREPARE:
320 case CPU_DOWN_PREPARE_FROZEN:
321 if (hotcpu == check_cpu) {
322 /* Pick any other online cpu. */
323 check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
324 }
325 break;
326
327 case CPU_UP_CANCELED: 227 case CPU_UP_CANCELED:
328 case CPU_UP_CANCELED_FROZEN: 228 case CPU_UP_CANCELED_FROZEN:
329 if (!per_cpu(watchdog_task, hotcpu)) 229 if (!per_cpu(watchdog_task, hotcpu))
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b125e3387568..4286b62b34a0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -101,6 +101,7 @@ static int __maybe_unused one = 1;
101static int __maybe_unused two = 2; 101static int __maybe_unused two = 2;
102static unsigned long one_ul = 1; 102static unsigned long one_ul = 1;
103static int one_hundred = 100; 103static int one_hundred = 100;
104static int one_thousand = 1000;
104 105
105/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 106/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
106static int maxolduid = 65535; 107static int maxolduid = 65535;
@@ -813,6 +814,19 @@ static struct ctl_table kern_table[] = {
813 .extra1 = &neg_one, 814 .extra1 = &neg_one,
814 .extra2 = &sixty, 815 .extra2 = &sixty,
815 }, 816 },
817#endif
818#ifdef CONFIG_DETECT_HUNG_TASK
819 {
820 .ctl_name = CTL_UNNUMBERED,
821 .procname = "hung_task_panic",
822 .data = &sysctl_hung_task_panic,
823 .maxlen = sizeof(int),
824 .mode = 0644,
825 .proc_handler = &proc_dointvec_minmax,
826 .strategy = &sysctl_intvec,
827 .extra1 = &zero,
828 .extra2 = &one,
829 },
816 { 830 {
817 .ctl_name = CTL_UNNUMBERED, 831 .ctl_name = CTL_UNNUMBERED,
818 .procname = "hung_task_check_count", 832 .procname = "hung_task_check_count",
@@ -828,7 +842,7 @@ static struct ctl_table kern_table[] = {
828 .data = &sysctl_hung_task_timeout_secs, 842 .data = &sysctl_hung_task_timeout_secs,
829 .maxlen = sizeof(unsigned long), 843 .maxlen = sizeof(unsigned long),
830 .mode = 0644, 844 .mode = 0644,
831 .proc_handler = &proc_doulongvec_minmax, 845 .proc_handler = &proc_dohung_task_timeout_secs,
832 .strategy = &sysctl_intvec, 846 .strategy = &sysctl_intvec,
833 }, 847 },
834 { 848 {
@@ -1027,6 +1041,28 @@ static struct ctl_table vm_table[] = {
1027 .proc_handler = &proc_dointvec, 1041 .proc_handler = &proc_dointvec,
1028 }, 1042 },
1029 { 1043 {
1044 .ctl_name = CTL_UNNUMBERED,
1045 .procname = "nr_pdflush_threads_min",
1046 .data = &nr_pdflush_threads_min,
1047 .maxlen = sizeof nr_pdflush_threads_min,
1048 .mode = 0644 /* read-write */,
1049 .proc_handler = &proc_dointvec_minmax,
1050 .strategy = &sysctl_intvec,
1051 .extra1 = &one,
1052 .extra2 = &nr_pdflush_threads_max,
1053 },
1054 {
1055 .ctl_name = CTL_UNNUMBERED,
1056 .procname = "nr_pdflush_threads_max",
1057 .data = &nr_pdflush_threads_max,
1058 .maxlen = sizeof nr_pdflush_threads_max,
1059 .mode = 0644 /* read-write */,
1060 .proc_handler = &proc_dointvec_minmax,
1061 .strategy = &sysctl_intvec,
1062 .extra1 = &nr_pdflush_threads_min,
1063 .extra2 = &one_thousand,
1064 },
1065 {
1030 .ctl_name = VM_SWAPPINESS, 1066 .ctl_name = VM_SWAPPINESS,
1031 .procname = "swappiness", 1067 .procname = "swappiness",
1032 .data = &vm_swappiness, 1068 .data = &vm_swappiness,
diff --git a/kernel/timer.c b/kernel/timer.c
index b4555568b4e4..cffffad01c31 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -531,10 +531,13 @@ static void __init_timer(struct timer_list *timer,
531} 531}
532 532
533/** 533/**
534 * init_timer - initialize a timer. 534 * init_timer_key - initialize a timer
535 * @timer: the timer to be initialized 535 * @timer: the timer to be initialized
536 * @name: name of the timer
537 * @key: lockdep class key of the fake lock used for tracking timer
538 * sync lock dependencies
536 * 539 *
537 * init_timer() must be done to a timer prior calling *any* of the 540 * init_timer_key() must be done to a timer prior calling *any* of the
538 * other timer functions. 541 * other timer functions.
539 */ 542 */
540void init_timer_key(struct timer_list *timer, 543void init_timer_key(struct timer_list *timer,
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index b32ff446c3fb..921ef5d1f0ba 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1377,12 +1377,12 @@ static int blk_trace_str2mask(const char *str)
1377{ 1377{
1378 int i; 1378 int i;
1379 int mask = 0; 1379 int mask = 0;
1380 char *s, *token; 1380 char *buf, *s, *token;
1381 1381
1382 s = kstrdup(str, GFP_KERNEL); 1382 buf = kstrdup(str, GFP_KERNEL);
1383 if (s == NULL) 1383 if (buf == NULL)
1384 return -ENOMEM; 1384 return -ENOMEM;
1385 s = strstrip(s); 1385 s = strstrip(buf);
1386 1386
1387 while (1) { 1387 while (1) {
1388 token = strsep(&s, ","); 1388 token = strsep(&s, ",");
@@ -1403,7 +1403,7 @@ static int blk_trace_str2mask(const char *str)
1403 break; 1403 break;
1404 } 1404 }
1405 } 1405 }
1406 kfree(s); 1406 kfree(buf);
1407 1407
1408 return mask; 1408 return mask;
1409} 1409}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0615751a3ed7..4865459f609f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3268,19 +3268,13 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp)
3268 3268
3269 info->tr = &global_trace; 3269 info->tr = &global_trace;
3270 info->cpu = cpu; 3270 info->cpu = cpu;
3271 info->spare = ring_buffer_alloc_read_page(info->tr->buffer); 3271 info->spare = NULL;
3272 /* Force reading ring buffer for first read */ 3272 /* Force reading ring buffer for first read */
3273 info->read = (unsigned int)-1; 3273 info->read = (unsigned int)-1;
3274 if (!info->spare)
3275 goto out;
3276 3274
3277 filp->private_data = info; 3275 filp->private_data = info;
3278 3276
3279 return 0; 3277 return nonseekable_open(inode, filp);
3280
3281 out:
3282 kfree(info);
3283 return -ENOMEM;
3284} 3278}
3285 3279
3286static ssize_t 3280static ssize_t
@@ -3295,6 +3289,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
3295 if (!count) 3289 if (!count)
3296 return 0; 3290 return 0;
3297 3291
3292 if (!info->spare)
3293 info->spare = ring_buffer_alloc_read_page(info->tr->buffer);
3294 if (!info->spare)
3295 return -ENOMEM;
3296
3298 /* Do we have previous read data to read? */ 3297 /* Do we have previous read data to read? */
3299 if (info->read < PAGE_SIZE) 3298 if (info->read < PAGE_SIZE)
3300 goto read; 3299 goto read;
@@ -3333,7 +3332,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file)
3333{ 3332{
3334 struct ftrace_buffer_info *info = file->private_data; 3333 struct ftrace_buffer_info *info = file->private_data;
3335 3334
3336 ring_buffer_free_read_page(info->tr->buffer, info->spare); 3335 if (info->spare)
3336 ring_buffer_free_read_page(info->tr->buffer, info->spare);
3337 kfree(info); 3337 kfree(info);
3338 3338
3339 return 0; 3339 return 0;
@@ -3419,14 +3419,19 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3419 int size, i; 3419 int size, i;
3420 size_t ret; 3420 size_t ret;
3421 3421
3422 /* 3422 if (*ppos & (PAGE_SIZE - 1)) {
3423 * We can't seek on a buffer input 3423 WARN_ONCE(1, "Ftrace: previous read must page-align\n");
3424 */ 3424 return -EINVAL;
3425 if (unlikely(*ppos)) 3425 }
3426 return -ESPIPE;
3427 3426
3427 if (len & (PAGE_SIZE - 1)) {
3428 WARN_ONCE(1, "Ftrace: splice_read should page-align\n");
3429 if (len < PAGE_SIZE)
3430 return -EINVAL;
3431 len &= PAGE_MASK;
3432 }
3428 3433
3429 for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) { 3434 for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) {
3430 struct page *page; 3435 struct page *page;
3431 int r; 3436 int r;
3432 3437
@@ -3465,6 +3470,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
3465 spd.partial[i].offset = 0; 3470 spd.partial[i].offset = 0;
3466 spd.partial[i].private = (unsigned long)ref; 3471 spd.partial[i].private = (unsigned long)ref;
3467 spd.nr_pages++; 3472 spd.nr_pages++;
3473 *ppos += PAGE_SIZE;
3468 } 3474 }
3469 3475
3470 spd.nr_pages = i; 3476 spd.nr_pages = i;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index a2a3af29c943..5e579645ac86 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -1,5 +1,5 @@
1#include <trace/syscall.h>
1#include <linux/kernel.h> 2#include <linux/kernel.h>
2#include <linux/ftrace.h>
3#include <asm/syscall.h> 3#include <asm/syscall.h>
4 4
5#include "trace_output.h" 5#include "trace_output.h"
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index b6b966ce1451..f71fb2a08950 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -966,20 +966,20 @@ undo:
966} 966}
967 967
968#ifdef CONFIG_SMP 968#ifdef CONFIG_SMP
969static struct workqueue_struct *work_on_cpu_wq __read_mostly;
970 969
971struct work_for_cpu { 970struct work_for_cpu {
972 struct work_struct work; 971 struct completion completion;
973 long (*fn)(void *); 972 long (*fn)(void *);
974 void *arg; 973 void *arg;
975 long ret; 974 long ret;
976}; 975};
977 976
978static void do_work_for_cpu(struct work_struct *w) 977static int do_work_for_cpu(void *_wfc)
979{ 978{
980 struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work); 979 struct work_for_cpu *wfc = _wfc;
981
982 wfc->ret = wfc->fn(wfc->arg); 980 wfc->ret = wfc->fn(wfc->arg);
981 complete(&wfc->completion);
982 return 0;
983} 983}
984 984
985/** 985/**
@@ -990,17 +990,23 @@ static void do_work_for_cpu(struct work_struct *w)
990 * 990 *
991 * This will return the value @fn returns. 991 * This will return the value @fn returns.
992 * It is up to the caller to ensure that the cpu doesn't go offline. 992 * It is up to the caller to ensure that the cpu doesn't go offline.
993 * The caller must not hold any locks which would prevent @fn from completing.
993 */ 994 */
994long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) 995long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
995{ 996{
996 struct work_for_cpu wfc; 997 struct task_struct *sub_thread;
997 998 struct work_for_cpu wfc = {
998 INIT_WORK(&wfc.work, do_work_for_cpu); 999 .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
999 wfc.fn = fn; 1000 .fn = fn,
1000 wfc.arg = arg; 1001 .arg = arg,
1001 queue_work_on(cpu, work_on_cpu_wq, &wfc.work); 1002 };
1002 flush_work(&wfc.work); 1003
1003 1004 sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
1005 if (IS_ERR(sub_thread))
1006 return PTR_ERR(sub_thread);
1007 kthread_bind(sub_thread, cpu);
1008 wake_up_process(sub_thread);
1009 wait_for_completion(&wfc.completion);
1004 return wfc.ret; 1010 return wfc.ret;
1005} 1011}
1006EXPORT_SYMBOL_GPL(work_on_cpu); 1012EXPORT_SYMBOL_GPL(work_on_cpu);
@@ -1016,8 +1022,4 @@ void __init init_workqueues(void)
1016 hotcpu_notifier(workqueue_cpu_callback, 0); 1022 hotcpu_notifier(workqueue_cpu_callback, 0);
1017 keventd_wq = create_workqueue("events"); 1023 keventd_wq = create_workqueue("events");
1018 BUG_ON(!keventd_wq); 1024 BUG_ON(!keventd_wq);
1019#ifdef CONFIG_SMP
1020 work_on_cpu_wq = create_workqueue("work_on_cpu");
1021 BUG_ON(!work_on_cpu_wq);
1022#endif
1023} 1025}