aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-04-08 04:35:30 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-08 04:35:30 -0400
commit5ea472a77f8e4811ceee3f44a9deda6ad6e8b789 (patch)
treea9ec5019e2b666a19874fc344ffb0dd5da6bce94 /kernel
parent6c009ecef8cca28c7c09eb16d0802e37915a76e1 (diff)
parent577c9c456f0e1371cbade38eaf91ae8e8a308555 (diff)
Merge commit 'v2.6.30-rc1' into perfcounters/core
Conflicts: arch/powerpc/include/asm/systbl.h arch/powerpc/include/asm/unistd.h include/linux/init_task.h Merge reason: the conflicts are non-trivial: PowerPC placement of sys_perf_counter_open has to be mixed with the new preadv/pwrite syscalls. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c8
-rw-r--r--kernel/hung_task.c217
-rw-r--r--kernel/irq/devres.c16
-rw-r--r--kernel/irq/handle.c50
-rw-r--r--kernel/irq/manage.c189
-rw-r--r--kernel/kprobes.c281
-rw-r--r--kernel/module.c3
-rw-r--r--kernel/softlockup.c100
-rw-r--r--kernel/sysctl.c38
-rw-r--r--kernel/trace/blktrace.c7
-rw-r--r--kernel/trace/trace.c21
-rw-r--r--kernel/trace/trace.h2
-rw-r--r--kernel/trace/trace_export.c2
-rw-r--r--kernel/trace/trace_output.c2
-rw-r--r--kernel/trace/trace_sched_switch.c3
-rw-r--r--kernel/trace/trace_sched_wakeup.c8
18 files changed, 729 insertions, 221 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 63c697529ca1..e914ca992d70 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
74obj-$(CONFIG_KPROBES) += kprobes.o 74obj-$(CONFIG_KPROBES) += kprobes.o
75obj-$(CONFIG_KGDB) += kgdb.o 75obj-$(CONFIG_KGDB) += kgdb.o
76obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o 76obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
77obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
77obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ 78obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
78obj-$(CONFIG_SECCOMP) += seccomp.o 79obj-$(CONFIG_SECCOMP) += seccomp.o
79obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 80obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/exit.c b/kernel/exit.c
index fbb5d94c8bbc..4741376c8dec 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -926,6 +926,8 @@ NORET_TYPE void do_exit(long code)
926 schedule(); 926 schedule();
927 } 927 }
928 928
929 exit_irq_thread();
930
929 exit_signals(tsk); /* sets PF_EXITING */ 931 exit_signals(tsk); /* sets PF_EXITING */
930 /* 932 /*
931 * tsk->flags are checked in the futex code to protect against 933 * tsk->flags are checked in the futex code to protect against
diff --git a/kernel/fork.c b/kernel/fork.c
index 381d7f9b70fb..89c1efb3ccf4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -645,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
645 645
646 tsk->min_flt = tsk->maj_flt = 0; 646 tsk->min_flt = tsk->maj_flt = 0;
647 tsk->nvcsw = tsk->nivcsw = 0; 647 tsk->nvcsw = tsk->nivcsw = 0;
648#ifdef CONFIG_DETECT_HUNG_TASK
649 tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
650#endif
648 651
649 tsk->mm = NULL; 652 tsk->mm = NULL;
650 tsk->active_mm = NULL; 653 tsk->active_mm = NULL;
@@ -1033,11 +1036,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
1033 1036
1034 p->default_timer_slack_ns = current->timer_slack_ns; 1037 p->default_timer_slack_ns = current->timer_slack_ns;
1035 1038
1036#ifdef CONFIG_DETECT_SOFTLOCKUP
1037 p->last_switch_count = 0;
1038 p->last_switch_timestamp = 0;
1039#endif
1040
1041 task_io_accounting_init(&p->ioac); 1039 task_io_accounting_init(&p->ioac);
1042 acct_clear_integrals(p); 1040 acct_clear_integrals(p);
1043 1041
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
new file mode 100644
index 000000000000..022a4927b785
--- /dev/null
+++ b/kernel/hung_task.c
@@ -0,0 +1,217 @@
1/*
2 * Detect Hung Task
3 *
4 * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
5 *
6 */
7
8#include <linux/mm.h>
9#include <linux/cpu.h>
10#include <linux/nmi.h>
11#include <linux/init.h>
12#include <linux/delay.h>
13#include <linux/freezer.h>
14#include <linux/kthread.h>
15#include <linux/lockdep.h>
16#include <linux/module.h>
17#include <linux/sysctl.h>
18
19/*
20 * The number of tasks checked:
21 */
22unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
23
24/*
25 * Limit number of tasks checked in a batch.
26 *
27 * This value controls the preemptibility of khungtaskd since preemption
28 * is disabled during the critical section. It also controls the size of
29 * the RCU grace period. So it needs to be upper-bound.
30 */
31#define HUNG_TASK_BATCHING 1024
32
33/*
34 * Zero means infinite timeout - no checking done:
35 */
36unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
37
38unsigned long __read_mostly sysctl_hung_task_warnings = 10;
39
40static int __read_mostly did_panic;
41
42static struct task_struct *watchdog_task;
43
44/*
45 * Should we panic (and reboot, if panic_timeout= is set) when a
46 * hung task is detected:
47 */
48unsigned int __read_mostly sysctl_hung_task_panic =
49 CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
50
51static int __init hung_task_panic_setup(char *str)
52{
53 sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
54
55 return 1;
56}
57__setup("hung_task_panic=", hung_task_panic_setup);
58
59static int
60hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
61{
62 did_panic = 1;
63
64 return NOTIFY_DONE;
65}
66
67static struct notifier_block panic_block = {
68 .notifier_call = hung_task_panic,
69};
70
71static void check_hung_task(struct task_struct *t, unsigned long timeout)
72{
73 unsigned long switch_count = t->nvcsw + t->nivcsw;
74
75 /*
76 * Ensure the task is not frozen.
77 * Also, when a freshly created task is scheduled once, changes
78 * its state to TASK_UNINTERRUPTIBLE without having ever been
79 * switched out once, it musn't be checked.
80 */
81 if (unlikely(t->flags & PF_FROZEN || !switch_count))
82 return;
83
84 if (switch_count != t->last_switch_count) {
85 t->last_switch_count = switch_count;
86 return;
87 }
88 if (!sysctl_hung_task_warnings)
89 return;
90 sysctl_hung_task_warnings--;
91
92 /*
93 * Ok, the task did not get scheduled for more than 2 minutes,
94 * complain:
95 */
96 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
97 "%ld seconds.\n", t->comm, t->pid, timeout);
98 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
99 " disables this message.\n");
100 sched_show_task(t);
101 __debug_show_held_locks(t);
102
103 touch_nmi_watchdog();
104
105 if (sysctl_hung_task_panic)
106 panic("hung_task: blocked tasks");
107}
108
109/*
110 * To avoid extending the RCU grace period for an unbounded amount of time,
111 * periodically exit the critical section and enter a new one.
112 *
113 * For preemptible RCU it is sufficient to call rcu_read_unlock in order
114 * exit the grace period. For classic RCU, a reschedule is required.
115 */
116static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
117{
118 get_task_struct(g);
119 get_task_struct(t);
120 rcu_read_unlock();
121 cond_resched();
122 rcu_read_lock();
123 put_task_struct(t);
124 put_task_struct(g);
125}
126
127/*
128 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
129 * a really long time (120 seconds). If that happens, print out
130 * a warning.
131 */
132static void check_hung_uninterruptible_tasks(unsigned long timeout)
133{
134 int max_count = sysctl_hung_task_check_count;
135 int batch_count = HUNG_TASK_BATCHING;
136 struct task_struct *g, *t;
137
138 /*
139 * If the system crashed already then all bets are off,
140 * do not report extra hung tasks:
141 */
142 if (test_taint(TAINT_DIE) || did_panic)
143 return;
144
145 rcu_read_lock();
146 do_each_thread(g, t) {
147 if (!--max_count)
148 goto unlock;
149 if (!--batch_count) {
150 batch_count = HUNG_TASK_BATCHING;
151 rcu_lock_break(g, t);
152 /* Exit if t or g was unhashed during refresh. */
153 if (t->state == TASK_DEAD || g->state == TASK_DEAD)
154 goto unlock;
155 }
156 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
157 if (t->state == TASK_UNINTERRUPTIBLE)
158 check_hung_task(t, timeout);
159 } while_each_thread(g, t);
160 unlock:
161 rcu_read_unlock();
162}
163
164static unsigned long timeout_jiffies(unsigned long timeout)
165{
166 /* timeout of 0 will disable the watchdog */
167 return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
168}
169
170/*
171 * Process updating of timeout sysctl
172 */
173int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
174 struct file *filp, void __user *buffer,
175 size_t *lenp, loff_t *ppos)
176{
177 int ret;
178
179 ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
180
181 if (ret || !write)
182 goto out;
183
184 wake_up_process(watchdog_task);
185
186 out:
187 return ret;
188}
189
190/*
191 * kthread which checks for tasks stuck in D state
192 */
193static int watchdog(void *dummy)
194{
195 set_user_nice(current, 0);
196
197 for ( ; ; ) {
198 unsigned long timeout = sysctl_hung_task_timeout_secs;
199
200 while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
201 timeout = sysctl_hung_task_timeout_secs;
202
203 check_hung_uninterruptible_tasks(timeout);
204 }
205
206 return 0;
207}
208
209static int __init hung_task_init(void)
210{
211 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
212 watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
213
214 return 0;
215}
216
217module_init(hung_task_init);
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c
index 38a25b8d8bff..d06df9c41cba 100644
--- a/kernel/irq/devres.c
+++ b/kernel/irq/devres.c
@@ -26,10 +26,12 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
26} 26}
27 27
28/** 28/**
29 * devm_request_irq - allocate an interrupt line for a managed device 29 * devm_request_threaded_irq - allocate an interrupt line for a managed device
30 * @dev: device to request interrupt for 30 * @dev: device to request interrupt for
31 * @irq: Interrupt line to allocate 31 * @irq: Interrupt line to allocate
32 * @handler: Function to be called when the IRQ occurs 32 * @handler: Function to be called when the IRQ occurs
33 * @thread_fn: function to be called in a threaded interrupt context. NULL
34 * for devices which handle everything in @handler
33 * @irqflags: Interrupt type flags 35 * @irqflags: Interrupt type flags
34 * @devname: An ascii name for the claiming device 36 * @devname: An ascii name for the claiming device
35 * @dev_id: A cookie passed back to the handler function 37 * @dev_id: A cookie passed back to the handler function
@@ -42,9 +44,10 @@ static int devm_irq_match(struct device *dev, void *res, void *data)
42 * If an IRQ allocated with this function needs to be freed 44 * If an IRQ allocated with this function needs to be freed
43 * separately, dev_free_irq() must be used. 45 * separately, dev_free_irq() must be used.
44 */ 46 */
45int devm_request_irq(struct device *dev, unsigned int irq, 47int devm_request_threaded_irq(struct device *dev, unsigned int irq,
46 irq_handler_t handler, unsigned long irqflags, 48 irq_handler_t handler, irq_handler_t thread_fn,
47 const char *devname, void *dev_id) 49 unsigned long irqflags, const char *devname,
50 void *dev_id)
48{ 51{
49 struct irq_devres *dr; 52 struct irq_devres *dr;
50 int rc; 53 int rc;
@@ -54,7 +57,8 @@ int devm_request_irq(struct device *dev, unsigned int irq,
54 if (!dr) 57 if (!dr)
55 return -ENOMEM; 58 return -ENOMEM;
56 59
57 rc = request_irq(irq, handler, irqflags, devname, dev_id); 60 rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname,
61 dev_id);
58 if (rc) { 62 if (rc) {
59 devres_free(dr); 63 devres_free(dr);
60 return rc; 64 return rc;
@@ -66,7 +70,7 @@ int devm_request_irq(struct device *dev, unsigned int irq,
66 70
67 return 0; 71 return 0;
68} 72}
69EXPORT_SYMBOL(devm_request_irq); 73EXPORT_SYMBOL(devm_request_threaded_irq);
70 74
71/** 75/**
72 * devm_free_irq - free an interrupt 76 * devm_free_irq - free an interrupt
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 343acecae629..d82142be8dd2 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -339,6 +339,15 @@ irqreturn_t no_action(int cpl, void *dev_id)
339 return IRQ_NONE; 339 return IRQ_NONE;
340} 340}
341 341
342static void warn_no_thread(unsigned int irq, struct irqaction *action)
343{
344 if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags))
345 return;
346
347 printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD "
348 "but no thread function available.", irq, action->name);
349}
350
342DEFINE_TRACE(irq_handler_entry); 351DEFINE_TRACE(irq_handler_entry);
343DEFINE_TRACE(irq_handler_exit); 352DEFINE_TRACE(irq_handler_exit);
344 353
@@ -363,8 +372,47 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
363 trace_irq_handler_entry(irq, action); 372 trace_irq_handler_entry(irq, action);
364 ret = action->handler(irq, action->dev_id); 373 ret = action->handler(irq, action->dev_id);
365 trace_irq_handler_exit(irq, action, ret); 374 trace_irq_handler_exit(irq, action, ret);
366 if (ret == IRQ_HANDLED) 375
376 switch (ret) {
377 case IRQ_WAKE_THREAD:
378 /*
379 * Set result to handled so the spurious check
380 * does not trigger.
381 */
382 ret = IRQ_HANDLED;
383
384 /*
385 * Catch drivers which return WAKE_THREAD but
386 * did not set up a thread function
387 */
388 if (unlikely(!action->thread_fn)) {
389 warn_no_thread(irq, action);
390 break;
391 }
392
393 /*
394 * Wake up the handler thread for this
395 * action. In case the thread crashed and was
396 * killed we just pretend that we handled the
397 * interrupt. The hardirq handler above has
398 * disabled the device interrupt, so no irq
399 * storm is lurking.
400 */
401 if (likely(!test_bit(IRQTF_DIED,
402 &action->thread_flags))) {
403 set_bit(IRQTF_RUNTHREAD, &action->thread_flags);
404 wake_up_process(action->thread);
405 }
406
407 /* Fall through to add to randomness */
408 case IRQ_HANDLED:
367 status |= action->flags; 409 status |= action->flags;
410 break;
411
412 default:
413 break;
414 }
415
368 retval |= ret; 416 retval |= ret;
369 action = action->next; 417 action = action->next;
370 } while (action); 418 } while (action);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 1516ab77355c..7e2e7dd4cd2f 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -8,16 +8,15 @@
8 */ 8 */
9 9
10#include <linux/irq.h> 10#include <linux/irq.h>
11#include <linux/kthread.h>
11#include <linux/module.h> 12#include <linux/module.h>
12#include <linux/random.h> 13#include <linux/random.h>
13#include <linux/interrupt.h> 14#include <linux/interrupt.h>
14#include <linux/slab.h> 15#include <linux/slab.h>
16#include <linux/sched.h>
15 17
16#include "internals.h" 18#include "internals.h"
17 19
18#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
19cpumask_var_t irq_default_affinity;
20
21/** 20/**
22 * synchronize_irq - wait for pending IRQ handlers (on other CPUs) 21 * synchronize_irq - wait for pending IRQ handlers (on other CPUs)
23 * @irq: interrupt number to wait for 22 * @irq: interrupt number to wait for
@@ -53,9 +52,18 @@ void synchronize_irq(unsigned int irq)
53 52
54 /* Oops, that failed? */ 53 /* Oops, that failed? */
55 } while (status & IRQ_INPROGRESS); 54 } while (status & IRQ_INPROGRESS);
55
56 /*
57 * We made sure that no hardirq handler is running. Now verify
58 * that no threaded handlers are active.
59 */
60 wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active));
56} 61}
57EXPORT_SYMBOL(synchronize_irq); 62EXPORT_SYMBOL(synchronize_irq);
58 63
64#ifdef CONFIG_SMP
65cpumask_var_t irq_default_affinity;
66
59/** 67/**
60 * irq_can_set_affinity - Check if the affinity of a given irq can be set 68 * irq_can_set_affinity - Check if the affinity of a given irq can be set
61 * @irq: Interrupt to check 69 * @irq: Interrupt to check
@@ -72,6 +80,18 @@ int irq_can_set_affinity(unsigned int irq)
72 return 1; 80 return 1;
73} 81}
74 82
83static void
84irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
85{
86 struct irqaction *action = desc->action;
87
88 while (action) {
89 if (action->thread)
90 set_cpus_allowed_ptr(action->thread, cpumask);
91 action = action->next;
92 }
93}
94
75/** 95/**
76 * irq_set_affinity - Set the irq affinity of a given irq 96 * irq_set_affinity - Set the irq affinity of a given irq
77 * @irq: Interrupt to set affinity 97 * @irq: Interrupt to set affinity
@@ -100,6 +120,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
100 cpumask_copy(desc->affinity, cpumask); 120 cpumask_copy(desc->affinity, cpumask);
101 desc->chip->set_affinity(irq, cpumask); 121 desc->chip->set_affinity(irq, cpumask);
102#endif 122#endif
123 irq_set_thread_affinity(desc, cpumask);
103 desc->status |= IRQ_AFFINITY_SET; 124 desc->status |= IRQ_AFFINITY_SET;
104 spin_unlock_irqrestore(&desc->lock, flags); 125 spin_unlock_irqrestore(&desc->lock, flags);
105 return 0; 126 return 0;
@@ -150,6 +171,8 @@ int irq_select_affinity_usr(unsigned int irq)
150 171
151 spin_lock_irqsave(&desc->lock, flags); 172 spin_lock_irqsave(&desc->lock, flags);
152 ret = setup_affinity(irq, desc); 173 ret = setup_affinity(irq, desc);
174 if (!ret)
175 irq_set_thread_affinity(desc, desc->affinity);
153 spin_unlock_irqrestore(&desc->lock, flags); 176 spin_unlock_irqrestore(&desc->lock, flags);
154 177
155 return ret; 178 return ret;
@@ -401,6 +424,90 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
401 return ret; 424 return ret;
402} 425}
403 426
427static int irq_wait_for_interrupt(struct irqaction *action)
428{
429 while (!kthread_should_stop()) {
430 set_current_state(TASK_INTERRUPTIBLE);
431
432 if (test_and_clear_bit(IRQTF_RUNTHREAD,
433 &action->thread_flags)) {
434 __set_current_state(TASK_RUNNING);
435 return 0;
436 }
437 schedule();
438 }
439 return -1;
440}
441
442/*
443 * Interrupt handler thread
444 */
445static int irq_thread(void *data)
446{
447 struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, };
448 struct irqaction *action = data;
449 struct irq_desc *desc = irq_to_desc(action->irq);
450 int wake;
451
452 sched_setscheduler(current, SCHED_FIFO, &param);
453 current->irqaction = action;
454
455 while (!irq_wait_for_interrupt(action)) {
456
457 atomic_inc(&desc->threads_active);
458
459 spin_lock_irq(&desc->lock);
460 if (unlikely(desc->status & IRQ_DISABLED)) {
461 /*
462 * CHECKME: We might need a dedicated
463 * IRQ_THREAD_PENDING flag here, which
464 * retriggers the thread in check_irq_resend()
465 * but AFAICT IRQ_PENDING should be fine as it
466 * retriggers the interrupt itself --- tglx
467 */
468 desc->status |= IRQ_PENDING;
469 spin_unlock_irq(&desc->lock);
470 } else {
471 spin_unlock_irq(&desc->lock);
472
473 action->thread_fn(action->irq, action->dev_id);
474 }
475
476 wake = atomic_dec_and_test(&desc->threads_active);
477
478 if (wake && waitqueue_active(&desc->wait_for_threads))
479 wake_up(&desc->wait_for_threads);
480 }
481
482 /*
483 * Clear irqaction. Otherwise exit_irq_thread() would make
484 * fuzz about an active irq thread going into nirvana.
485 */
486 current->irqaction = NULL;
487 return 0;
488}
489
490/*
491 * Called from do_exit()
492 */
493void exit_irq_thread(void)
494{
495 struct task_struct *tsk = current;
496
497 if (!tsk->irqaction)
498 return;
499
500 printk(KERN_ERR
501 "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
502 tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq);
503
504 /*
505 * Set the THREAD DIED flag to prevent further wakeups of the
506 * soon to be gone threaded handler.
507 */
508 set_bit(IRQTF_DIED, &tsk->irqaction->flags);
509}
510
404/* 511/*
405 * Internal function to register an irqaction - typically used to 512 * Internal function to register an irqaction - typically used to
406 * allocate special interrupts that are part of the architecture. 513 * allocate special interrupts that are part of the architecture.
@@ -437,6 +544,26 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
437 } 544 }
438 545
439 /* 546 /*
547 * Threaded handler ?
548 */
549 if (new->thread_fn) {
550 struct task_struct *t;
551
552 t = kthread_create(irq_thread, new, "irq/%d-%s", irq,
553 new->name);
554 if (IS_ERR(t))
555 return PTR_ERR(t);
556 /*
557 * We keep the reference to the task struct even if
558 * the thread dies to avoid that the interrupt code
559 * references an already freed task_struct.
560 */
561 get_task_struct(t);
562 new->thread = t;
563 wake_up_process(t);
564 }
565
566 /*
440 * The following block of code has to be executed atomically 567 * The following block of code has to be executed atomically
441 */ 568 */
442 spin_lock_irqsave(&desc->lock, flags); 569 spin_lock_irqsave(&desc->lock, flags);
@@ -473,15 +600,15 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
473 if (!shared) { 600 if (!shared) {
474 irq_chip_set_defaults(desc->chip); 601 irq_chip_set_defaults(desc->chip);
475 602
603 init_waitqueue_head(&desc->wait_for_threads);
604
476 /* Setup the type (level, edge polarity) if configured: */ 605 /* Setup the type (level, edge polarity) if configured: */
477 if (new->flags & IRQF_TRIGGER_MASK) { 606 if (new->flags & IRQF_TRIGGER_MASK) {
478 ret = __irq_set_trigger(desc, irq, 607 ret = __irq_set_trigger(desc, irq,
479 new->flags & IRQF_TRIGGER_MASK); 608 new->flags & IRQF_TRIGGER_MASK);
480 609
481 if (ret) { 610 if (ret)
482 spin_unlock_irqrestore(&desc->lock, flags); 611 goto out_thread;
483 return ret;
484 }
485 } else 612 } else
486 compat_irq_chip_set_default_handler(desc); 613 compat_irq_chip_set_default_handler(desc);
487#if defined(CONFIG_IRQ_PER_CPU) 614#if defined(CONFIG_IRQ_PER_CPU)
@@ -549,8 +676,19 @@ mismatch:
549 dump_stack(); 676 dump_stack();
550 } 677 }
551#endif 678#endif
679 ret = -EBUSY;
680
681out_thread:
552 spin_unlock_irqrestore(&desc->lock, flags); 682 spin_unlock_irqrestore(&desc->lock, flags);
553 return -EBUSY; 683 if (new->thread) {
684 struct task_struct *t = new->thread;
685
686 new->thread = NULL;
687 if (likely(!test_bit(IRQTF_DIED, &new->thread_flags)))
688 kthread_stop(t);
689 put_task_struct(t);
690 }
691 return ret;
554} 692}
555 693
556/** 694/**
@@ -576,6 +714,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
576{ 714{
577 struct irq_desc *desc = irq_to_desc(irq); 715 struct irq_desc *desc = irq_to_desc(irq);
578 struct irqaction *action, **action_ptr; 716 struct irqaction *action, **action_ptr;
717 struct task_struct *irqthread;
579 unsigned long flags; 718 unsigned long flags;
580 719
581 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); 720 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@ -622,6 +761,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
622 else 761 else
623 desc->chip->disable(irq); 762 desc->chip->disable(irq);
624 } 763 }
764
765 irqthread = action->thread;
766 action->thread = NULL;
767
625 spin_unlock_irqrestore(&desc->lock, flags); 768 spin_unlock_irqrestore(&desc->lock, flags);
626 769
627 unregister_handler_proc(irq, action); 770 unregister_handler_proc(irq, action);
@@ -629,6 +772,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
629 /* Make sure it's not being used on another CPU: */ 772 /* Make sure it's not being used on another CPU: */
630 synchronize_irq(irq); 773 synchronize_irq(irq);
631 774
775 if (irqthread) {
776 if (!test_bit(IRQTF_DIED, &action->thread_flags))
777 kthread_stop(irqthread);
778 put_task_struct(irqthread);
779 }
780
632#ifdef CONFIG_DEBUG_SHIRQ 781#ifdef CONFIG_DEBUG_SHIRQ
633 /* 782 /*
634 * It's a shared IRQ -- the driver ought to be prepared for an IRQ 783 * It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@ -681,9 +830,12 @@ void free_irq(unsigned int irq, void *dev_id)
681EXPORT_SYMBOL(free_irq); 830EXPORT_SYMBOL(free_irq);
682 831
683/** 832/**
684 * request_irq - allocate an interrupt line 833 * request_threaded_irq - allocate an interrupt line
685 * @irq: Interrupt line to allocate 834 * @irq: Interrupt line to allocate
686 * @handler: Function to be called when the IRQ occurs 835 * @handler: Function to be called when the IRQ occurs.
836 * Primary handler for threaded interrupts
837 * @thread_fn: Function called from the irq handler thread
838 * If NULL, no irq thread is created
687 * @irqflags: Interrupt type flags 839 * @irqflags: Interrupt type flags
688 * @devname: An ascii name for the claiming device 840 * @devname: An ascii name for the claiming device
689 * @dev_id: A cookie passed back to the handler function 841 * @dev_id: A cookie passed back to the handler function
@@ -695,6 +847,15 @@ EXPORT_SYMBOL(free_irq);
695 * raises, you must take care both to initialise your hardware 847 * raises, you must take care both to initialise your hardware
696 * and to set up the interrupt handler in the right order. 848 * and to set up the interrupt handler in the right order.
697 * 849 *
850 * If you want to set up a threaded irq handler for your device
851 * then you need to supply @handler and @thread_fn. @handler ist
852 * still called in hard interrupt context and has to check
853 * whether the interrupt originates from the device. If yes it
854 * needs to disable the interrupt on the device and return
855 * IRQ_THREAD_WAKE which will wake up the handler thread and run
856 * @thread_fn. This split handler design is necessary to support
857 * shared interrupts.
858 *
698 * Dev_id must be globally unique. Normally the address of the 859 * Dev_id must be globally unique. Normally the address of the
699 * device data structure is used as the cookie. Since the handler 860 * device data structure is used as the cookie. Since the handler
700 * receives this value it makes sense to use it. 861 * receives this value it makes sense to use it.
@@ -710,8 +871,9 @@ EXPORT_SYMBOL(free_irq);
710 * IRQF_TRIGGER_* Specify active edge(s) or level 871 * IRQF_TRIGGER_* Specify active edge(s) or level
711 * 872 *
712 */ 873 */
713int request_irq(unsigned int irq, irq_handler_t handler, 874int request_threaded_irq(unsigned int irq, irq_handler_t handler,
714 unsigned long irqflags, const char *devname, void *dev_id) 875 irq_handler_t thread_fn, unsigned long irqflags,
876 const char *devname, void *dev_id)
715{ 877{
716 struct irqaction *action; 878 struct irqaction *action;
717 struct irq_desc *desc; 879 struct irq_desc *desc;
@@ -759,6 +921,7 @@ int request_irq(unsigned int irq, irq_handler_t handler,
759 return -ENOMEM; 921 return -ENOMEM;
760 922
761 action->handler = handler; 923 action->handler = handler;
924 action->thread_fn = thread_fn;
762 action->flags = irqflags; 925 action->flags = irqflags;
763 action->name = devname; 926 action->name = devname;
764 action->dev_id = dev_id; 927 action->dev_id = dev_id;
@@ -788,4 +951,4 @@ int request_irq(unsigned int irq, irq_handler_t handler,
788#endif 951#endif
789 return retval; 952 return retval;
790} 953}
791EXPORT_SYMBOL(request_irq); 954EXPORT_SYMBOL(request_threaded_irq);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 5016bfb682b9..a5e74ddee0e2 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -68,7 +68,7 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE];
68static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; 68static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE];
69 69
70/* NOTE: change this value only with kprobe_mutex held */ 70/* NOTE: change this value only with kprobe_mutex held */
71static bool kprobe_enabled; 71static bool kprobes_all_disarmed;
72 72
73static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ 73static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */
74static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; 74static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
@@ -328,7 +328,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs)
328 struct kprobe *kp; 328 struct kprobe *kp;
329 329
330 list_for_each_entry_rcu(kp, &p->list, list) { 330 list_for_each_entry_rcu(kp, &p->list, list) {
331 if (kp->pre_handler && !kprobe_gone(kp)) { 331 if (kp->pre_handler && likely(!kprobe_disabled(kp))) {
332 set_kprobe_instance(kp); 332 set_kprobe_instance(kp);
333 if (kp->pre_handler(kp, regs)) 333 if (kp->pre_handler(kp, regs))
334 return 1; 334 return 1;
@@ -344,7 +344,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs,
344 struct kprobe *kp; 344 struct kprobe *kp;
345 345
346 list_for_each_entry_rcu(kp, &p->list, list) { 346 list_for_each_entry_rcu(kp, &p->list, list) {
347 if (kp->post_handler && !kprobe_gone(kp)) { 347 if (kp->post_handler && likely(!kprobe_disabled(kp))) {
348 set_kprobe_instance(kp); 348 set_kprobe_instance(kp);
349 kp->post_handler(kp, regs, flags); 349 kp->post_handler(kp, regs, flags);
350 reset_kprobe_instance(); 350 reset_kprobe_instance();
@@ -518,20 +518,28 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p)
518} 518}
519 519
520/* 520/*
521* Add the new probe to old_p->list. Fail if this is the 521* Add the new probe to ap->list. Fail if this is the
522* second jprobe at the address - two jprobes can't coexist 522* second jprobe at the address - two jprobes can't coexist
523*/ 523*/
524static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) 524static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p)
525{ 525{
526 BUG_ON(kprobe_gone(ap) || kprobe_gone(p));
526 if (p->break_handler) { 527 if (p->break_handler) {
527 if (old_p->break_handler) 528 if (ap->break_handler)
528 return -EEXIST; 529 return -EEXIST;
529 list_add_tail_rcu(&p->list, &old_p->list); 530 list_add_tail_rcu(&p->list, &ap->list);
530 old_p->break_handler = aggr_break_handler; 531 ap->break_handler = aggr_break_handler;
531 } else 532 } else
532 list_add_rcu(&p->list, &old_p->list); 533 list_add_rcu(&p->list, &ap->list);
533 if (p->post_handler && !old_p->post_handler) 534 if (p->post_handler && !ap->post_handler)
534 old_p->post_handler = aggr_post_handler; 535 ap->post_handler = aggr_post_handler;
536
537 if (kprobe_disabled(ap) && !kprobe_disabled(p)) {
538 ap->flags &= ~KPROBE_FLAG_DISABLED;
539 if (!kprobes_all_disarmed)
540 /* Arm the breakpoint again. */
541 arch_arm_kprobe(ap);
542 }
535 return 0; 543 return 0;
536} 544}
537 545
@@ -544,6 +552,7 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
544 copy_kprobe(p, ap); 552 copy_kprobe(p, ap);
545 flush_insn_slot(ap); 553 flush_insn_slot(ap);
546 ap->addr = p->addr; 554 ap->addr = p->addr;
555 ap->flags = p->flags;
547 ap->pre_handler = aggr_pre_handler; 556 ap->pre_handler = aggr_pre_handler;
548 ap->fault_handler = aggr_fault_handler; 557 ap->fault_handler = aggr_fault_handler;
549 /* We don't care the kprobe which has gone. */ 558 /* We don't care the kprobe which has gone. */
@@ -566,44 +575,59 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p,
566 struct kprobe *p) 575 struct kprobe *p)
567{ 576{
568 int ret = 0; 577 int ret = 0;
569 struct kprobe *ap; 578 struct kprobe *ap = old_p;
570 579
571 if (kprobe_gone(old_p)) { 580 if (old_p->pre_handler != aggr_pre_handler) {
581 /* If old_p is not an aggr_probe, create new aggr_kprobe. */
582 ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
583 if (!ap)
584 return -ENOMEM;
585 add_aggr_kprobe(ap, old_p);
586 }
587
588 if (kprobe_gone(ap)) {
572 /* 589 /*
573 * Attempting to insert new probe at the same location that 590 * Attempting to insert new probe at the same location that
574 * had a probe in the module vaddr area which already 591 * had a probe in the module vaddr area which already
575 * freed. So, the instruction slot has already been 592 * freed. So, the instruction slot has already been
576 * released. We need a new slot for the new probe. 593 * released. We need a new slot for the new probe.
577 */ 594 */
578 ret = arch_prepare_kprobe(old_p); 595 ret = arch_prepare_kprobe(ap);
579 if (ret) 596 if (ret)
597 /*
598 * Even if fail to allocate new slot, don't need to
599 * free aggr_probe. It will be used next time, or
600 * freed by unregister_kprobe.
601 */
580 return ret; 602 return ret;
581 } 603
582 if (old_p->pre_handler == aggr_pre_handler) {
583 copy_kprobe(old_p, p);
584 ret = add_new_kprobe(old_p, p);
585 ap = old_p;
586 } else {
587 ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL);
588 if (!ap) {
589 if (kprobe_gone(old_p))
590 arch_remove_kprobe(old_p);
591 return -ENOMEM;
592 }
593 add_aggr_kprobe(ap, old_p);
594 copy_kprobe(ap, p);
595 ret = add_new_kprobe(ap, p);
596 }
597 if (kprobe_gone(old_p)) {
598 /* 604 /*
599 * If the old_p has gone, its breakpoint has been disarmed. 605 * Clear gone flag to prevent allocating new slot again, and
600 * We have to arm it again after preparing real kprobes. 606 * set disabled flag because it is not armed yet.
601 */ 607 */
602 ap->flags &= ~KPROBE_FLAG_GONE; 608 ap->flags = (ap->flags & ~KPROBE_FLAG_GONE)
603 if (kprobe_enabled) 609 | KPROBE_FLAG_DISABLED;
604 arch_arm_kprobe(ap);
605 } 610 }
606 return ret; 611
612 copy_kprobe(ap, p);
613 return add_new_kprobe(ap, p);
614}
615
616/* Try to disable aggr_kprobe, and return 1 if succeeded.*/
617static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p)
618{
619 struct kprobe *kp;
620
621 list_for_each_entry_rcu(kp, &p->list, list) {
622 if (!kprobe_disabled(kp))
623 /*
624 * There is an active probe on the list.
625 * We can't disable aggr_kprobe.
626 */
627 return 0;
628 }
629 p->flags |= KPROBE_FLAG_DISABLED;
630 return 1;
607} 631}
608 632
609static int __kprobes in_kprobes_functions(unsigned long addr) 633static int __kprobes in_kprobes_functions(unsigned long addr)
@@ -664,7 +688,9 @@ int __kprobes register_kprobe(struct kprobe *p)
664 return -EINVAL; 688 return -EINVAL;
665 } 689 }
666 690
667 p->flags = 0; 691 /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */
692 p->flags &= KPROBE_FLAG_DISABLED;
693
668 /* 694 /*
669 * Check if are we probing a module. 695 * Check if are we probing a module.
670 */ 696 */
@@ -709,7 +735,7 @@ int __kprobes register_kprobe(struct kprobe *p)
709 hlist_add_head_rcu(&p->hlist, 735 hlist_add_head_rcu(&p->hlist,
710 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); 736 &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]);
711 737
712 if (kprobe_enabled) 738 if (!kprobes_all_disarmed && !kprobe_disabled(p))
713 arch_arm_kprobe(p); 739 arch_arm_kprobe(p);
714 740
715out_unlock_text: 741out_unlock_text:
@@ -722,26 +748,39 @@ out:
722 748
723 return ret; 749 return ret;
724} 750}
751EXPORT_SYMBOL_GPL(register_kprobe);
725 752
726/* 753/* Check passed kprobe is valid and return kprobe in kprobe_table. */
727 * Unregister a kprobe without a scheduler synchronization. 754static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p)
728 */
729static int __kprobes __unregister_kprobe_top(struct kprobe *p)
730{ 755{
731 struct kprobe *old_p, *list_p; 756 struct kprobe *old_p, *list_p;
732 757
733 old_p = get_kprobe(p->addr); 758 old_p = get_kprobe(p->addr);
734 if (unlikely(!old_p)) 759 if (unlikely(!old_p))
735 return -EINVAL; 760 return NULL;
736 761
737 if (p != old_p) { 762 if (p != old_p) {
738 list_for_each_entry_rcu(list_p, &old_p->list, list) 763 list_for_each_entry_rcu(list_p, &old_p->list, list)
739 if (list_p == p) 764 if (list_p == p)
740 /* kprobe p is a valid probe */ 765 /* kprobe p is a valid probe */
741 goto valid_p; 766 goto valid;
742 return -EINVAL; 767 return NULL;
743 } 768 }
744valid_p: 769valid:
770 return old_p;
771}
772
773/*
774 * Unregister a kprobe without a scheduler synchronization.
775 */
776static int __kprobes __unregister_kprobe_top(struct kprobe *p)
777{
778 struct kprobe *old_p, *list_p;
779
780 old_p = __get_valid_kprobe(p);
781 if (old_p == NULL)
782 return -EINVAL;
783
745 if (old_p == p || 784 if (old_p == p ||
746 (old_p->pre_handler == aggr_pre_handler && 785 (old_p->pre_handler == aggr_pre_handler &&
747 list_is_singular(&old_p->list))) { 786 list_is_singular(&old_p->list))) {
@@ -750,7 +789,7 @@ valid_p:
750 * enabled and not gone - otherwise, the breakpoint would 789 * enabled and not gone - otherwise, the breakpoint would
751 * already have been removed. We save on flushing icache. 790 * already have been removed. We save on flushing icache.
752 */ 791 */
753 if (kprobe_enabled && !kprobe_gone(old_p)) { 792 if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) {
754 mutex_lock(&text_mutex); 793 mutex_lock(&text_mutex);
755 arch_disarm_kprobe(p); 794 arch_disarm_kprobe(p);
756 mutex_unlock(&text_mutex); 795 mutex_unlock(&text_mutex);
@@ -768,6 +807,11 @@ valid_p:
768 } 807 }
769noclean: 808noclean:
770 list_del_rcu(&p->list); 809 list_del_rcu(&p->list);
810 if (!kprobe_disabled(old_p)) {
811 try_to_disable_aggr_kprobe(old_p);
812 if (!kprobes_all_disarmed && kprobe_disabled(old_p))
813 arch_disarm_kprobe(old_p);
814 }
771 } 815 }
772 return 0; 816 return 0;
773} 817}
@@ -803,11 +847,13 @@ int __kprobes register_kprobes(struct kprobe **kps, int num)
803 } 847 }
804 return ret; 848 return ret;
805} 849}
850EXPORT_SYMBOL_GPL(register_kprobes);
806 851
807void __kprobes unregister_kprobe(struct kprobe *p) 852void __kprobes unregister_kprobe(struct kprobe *p)
808{ 853{
809 unregister_kprobes(&p, 1); 854 unregister_kprobes(&p, 1);
810} 855}
856EXPORT_SYMBOL_GPL(unregister_kprobe);
811 857
812void __kprobes unregister_kprobes(struct kprobe **kps, int num) 858void __kprobes unregister_kprobes(struct kprobe **kps, int num)
813{ 859{
@@ -826,6 +872,7 @@ void __kprobes unregister_kprobes(struct kprobe **kps, int num)
826 if (kps[i]->addr) 872 if (kps[i]->addr)
827 __unregister_kprobe_bottom(kps[i]); 873 __unregister_kprobe_bottom(kps[i]);
828} 874}
875EXPORT_SYMBOL_GPL(unregister_kprobes);
829 876
830static struct notifier_block kprobe_exceptions_nb = { 877static struct notifier_block kprobe_exceptions_nb = {
831 .notifier_call = kprobe_exceptions_notify, 878 .notifier_call = kprobe_exceptions_notify,
@@ -865,16 +912,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num)
865 } 912 }
866 return ret; 913 return ret;
867} 914}
915EXPORT_SYMBOL_GPL(register_jprobes);
868 916
869int __kprobes register_jprobe(struct jprobe *jp) 917int __kprobes register_jprobe(struct jprobe *jp)
870{ 918{
871 return register_jprobes(&jp, 1); 919 return register_jprobes(&jp, 1);
872} 920}
921EXPORT_SYMBOL_GPL(register_jprobe);
873 922
874void __kprobes unregister_jprobe(struct jprobe *jp) 923void __kprobes unregister_jprobe(struct jprobe *jp)
875{ 924{
876 unregister_jprobes(&jp, 1); 925 unregister_jprobes(&jp, 1);
877} 926}
927EXPORT_SYMBOL_GPL(unregister_jprobe);
878 928
879void __kprobes unregister_jprobes(struct jprobe **jps, int num) 929void __kprobes unregister_jprobes(struct jprobe **jps, int num)
880{ 930{
@@ -894,6 +944,7 @@ void __kprobes unregister_jprobes(struct jprobe **jps, int num)
894 __unregister_kprobe_bottom(&jps[i]->kp); 944 __unregister_kprobe_bottom(&jps[i]->kp);
895 } 945 }
896} 946}
947EXPORT_SYMBOL_GPL(unregister_jprobes);
897 948
898#ifdef CONFIG_KRETPROBES 949#ifdef CONFIG_KRETPROBES
899/* 950/*
@@ -987,6 +1038,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
987 free_rp_inst(rp); 1038 free_rp_inst(rp);
988 return ret; 1039 return ret;
989} 1040}
1041EXPORT_SYMBOL_GPL(register_kretprobe);
990 1042
991int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1043int __kprobes register_kretprobes(struct kretprobe **rps, int num)
992{ 1044{
@@ -1004,11 +1056,13 @@ int __kprobes register_kretprobes(struct kretprobe **rps, int num)
1004 } 1056 }
1005 return ret; 1057 return ret;
1006} 1058}
1059EXPORT_SYMBOL_GPL(register_kretprobes);
1007 1060
1008void __kprobes unregister_kretprobe(struct kretprobe *rp) 1061void __kprobes unregister_kretprobe(struct kretprobe *rp)
1009{ 1062{
1010 unregister_kretprobes(&rp, 1); 1063 unregister_kretprobes(&rp, 1);
1011} 1064}
1065EXPORT_SYMBOL_GPL(unregister_kretprobe);
1012 1066
1013void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1067void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1014{ 1068{
@@ -1030,24 +1084,30 @@ void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1030 } 1084 }
1031 } 1085 }
1032} 1086}
1087EXPORT_SYMBOL_GPL(unregister_kretprobes);
1033 1088
1034#else /* CONFIG_KRETPROBES */ 1089#else /* CONFIG_KRETPROBES */
1035int __kprobes register_kretprobe(struct kretprobe *rp) 1090int __kprobes register_kretprobe(struct kretprobe *rp)
1036{ 1091{
1037 return -ENOSYS; 1092 return -ENOSYS;
1038} 1093}
1094EXPORT_SYMBOL_GPL(register_kretprobe);
1039 1095
1040int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1096int __kprobes register_kretprobes(struct kretprobe **rps, int num)
1041{ 1097{
1042 return -ENOSYS; 1098 return -ENOSYS;
1043} 1099}
1100EXPORT_SYMBOL_GPL(register_kretprobes);
1101
1044void __kprobes unregister_kretprobe(struct kretprobe *rp) 1102void __kprobes unregister_kretprobe(struct kretprobe *rp)
1045{ 1103{
1046} 1104}
1105EXPORT_SYMBOL_GPL(unregister_kretprobe);
1047 1106
1048void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1107void __kprobes unregister_kretprobes(struct kretprobe **rps, int num)
1049{ 1108{
1050} 1109}
1110EXPORT_SYMBOL_GPL(unregister_kretprobes);
1051 1111
1052static int __kprobes pre_handler_kretprobe(struct kprobe *p, 1112static int __kprobes pre_handler_kretprobe(struct kprobe *p,
1053 struct pt_regs *regs) 1113 struct pt_regs *regs)
@@ -1061,6 +1121,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p,
1061static void __kprobes kill_kprobe(struct kprobe *p) 1121static void __kprobes kill_kprobe(struct kprobe *p)
1062{ 1122{
1063 struct kprobe *kp; 1123 struct kprobe *kp;
1124
1064 p->flags |= KPROBE_FLAG_GONE; 1125 p->flags |= KPROBE_FLAG_GONE;
1065 if (p->pre_handler == aggr_pre_handler) { 1126 if (p->pre_handler == aggr_pre_handler) {
1066 /* 1127 /*
@@ -1173,8 +1234,8 @@ static int __init init_kprobes(void)
1173 } 1234 }
1174 } 1235 }
1175 1236
1176 /* By default, kprobes are enabled */ 1237 /* By default, kprobes are armed */
1177 kprobe_enabled = true; 1238 kprobes_all_disarmed = false;
1178 1239
1179 err = arch_init_kprobes(); 1240 err = arch_init_kprobes();
1180 if (!err) 1241 if (!err)
@@ -1202,12 +1263,18 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p,
1202 else 1263 else
1203 kprobe_type = "k"; 1264 kprobe_type = "k";
1204 if (sym) 1265 if (sym)
1205 seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type, 1266 seq_printf(pi, "%p %s %s+0x%x %s %s%s\n",
1206 sym, offset, (modname ? modname : " "), 1267 p->addr, kprobe_type, sym, offset,
1207 (kprobe_gone(p) ? "[GONE]" : "")); 1268 (modname ? modname : " "),
1269 (kprobe_gone(p) ? "[GONE]" : ""),
1270 ((kprobe_disabled(p) && !kprobe_gone(p)) ?
1271 "[DISABLED]" : ""));
1208 else 1272 else
1209 seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr, 1273 seq_printf(pi, "%p %s %p %s%s\n",
1210 (kprobe_gone(p) ? "[GONE]" : "")); 1274 p->addr, kprobe_type, p->addr,
1275 (kprobe_gone(p) ? "[GONE]" : ""),
1276 ((kprobe_disabled(p) && !kprobe_gone(p)) ?
1277 "[DISABLED]" : ""));
1211} 1278}
1212 1279
1213static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) 1280static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos)
@@ -1272,7 +1339,72 @@ static struct file_operations debugfs_kprobes_operations = {
1272 .release = seq_release, 1339 .release = seq_release,
1273}; 1340};
1274 1341
1275static void __kprobes enable_all_kprobes(void) 1342/* Disable one kprobe */
1343int __kprobes disable_kprobe(struct kprobe *kp)
1344{
1345 int ret = 0;
1346 struct kprobe *p;
1347
1348 mutex_lock(&kprobe_mutex);
1349
1350 /* Check whether specified probe is valid. */
1351 p = __get_valid_kprobe(kp);
1352 if (unlikely(p == NULL)) {
1353 ret = -EINVAL;
1354 goto out;
1355 }
1356
1357 /* If the probe is already disabled (or gone), just return */
1358 if (kprobe_disabled(kp))
1359 goto out;
1360
1361 kp->flags |= KPROBE_FLAG_DISABLED;
1362 if (p != kp)
1363 /* When kp != p, p is always enabled. */
1364 try_to_disable_aggr_kprobe(p);
1365
1366 if (!kprobes_all_disarmed && kprobe_disabled(p))
1367 arch_disarm_kprobe(p);
1368out:
1369 mutex_unlock(&kprobe_mutex);
1370 return ret;
1371}
1372EXPORT_SYMBOL_GPL(disable_kprobe);
1373
1374/* Enable one kprobe */
1375int __kprobes enable_kprobe(struct kprobe *kp)
1376{
1377 int ret = 0;
1378 struct kprobe *p;
1379
1380 mutex_lock(&kprobe_mutex);
1381
1382 /* Check whether specified probe is valid. */
1383 p = __get_valid_kprobe(kp);
1384 if (unlikely(p == NULL)) {
1385 ret = -EINVAL;
1386 goto out;
1387 }
1388
1389 if (kprobe_gone(kp)) {
1390 /* This kprobe has gone, we couldn't enable it. */
1391 ret = -EINVAL;
1392 goto out;
1393 }
1394
1395 if (!kprobes_all_disarmed && kprobe_disabled(p))
1396 arch_arm_kprobe(p);
1397
1398 p->flags &= ~KPROBE_FLAG_DISABLED;
1399 if (p != kp)
1400 kp->flags &= ~KPROBE_FLAG_DISABLED;
1401out:
1402 mutex_unlock(&kprobe_mutex);
1403 return ret;
1404}
1405EXPORT_SYMBOL_GPL(enable_kprobe);
1406
1407static void __kprobes arm_all_kprobes(void)
1276{ 1408{
1277 struct hlist_head *head; 1409 struct hlist_head *head;
1278 struct hlist_node *node; 1410 struct hlist_node *node;
@@ -1281,20 +1413,20 @@ static void __kprobes enable_all_kprobes(void)
1281 1413
1282 mutex_lock(&kprobe_mutex); 1414 mutex_lock(&kprobe_mutex);
1283 1415
1284 /* If kprobes are already enabled, just return */ 1416 /* If kprobes are armed, just return */
1285 if (kprobe_enabled) 1417 if (!kprobes_all_disarmed)
1286 goto already_enabled; 1418 goto already_enabled;
1287 1419
1288 mutex_lock(&text_mutex); 1420 mutex_lock(&text_mutex);
1289 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1421 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1290 head = &kprobe_table[i]; 1422 head = &kprobe_table[i];
1291 hlist_for_each_entry_rcu(p, node, head, hlist) 1423 hlist_for_each_entry_rcu(p, node, head, hlist)
1292 if (!kprobe_gone(p)) 1424 if (!kprobe_disabled(p))
1293 arch_arm_kprobe(p); 1425 arch_arm_kprobe(p);
1294 } 1426 }
1295 mutex_unlock(&text_mutex); 1427 mutex_unlock(&text_mutex);
1296 1428
1297 kprobe_enabled = true; 1429 kprobes_all_disarmed = false;
1298 printk(KERN_INFO "Kprobes globally enabled\n"); 1430 printk(KERN_INFO "Kprobes globally enabled\n");
1299 1431
1300already_enabled: 1432already_enabled:
@@ -1302,7 +1434,7 @@ already_enabled:
1302 return; 1434 return;
1303} 1435}
1304 1436
1305static void __kprobes disable_all_kprobes(void) 1437static void __kprobes disarm_all_kprobes(void)
1306{ 1438{
1307 struct hlist_head *head; 1439 struct hlist_head *head;
1308 struct hlist_node *node; 1440 struct hlist_node *node;
@@ -1311,17 +1443,17 @@ static void __kprobes disable_all_kprobes(void)
1311 1443
1312 mutex_lock(&kprobe_mutex); 1444 mutex_lock(&kprobe_mutex);
1313 1445
1314 /* If kprobes are already disabled, just return */ 1446 /* If kprobes are already disarmed, just return */
1315 if (!kprobe_enabled) 1447 if (kprobes_all_disarmed)
1316 goto already_disabled; 1448 goto already_disabled;
1317 1449
1318 kprobe_enabled = false; 1450 kprobes_all_disarmed = true;
1319 printk(KERN_INFO "Kprobes globally disabled\n"); 1451 printk(KERN_INFO "Kprobes globally disabled\n");
1320 mutex_lock(&text_mutex); 1452 mutex_lock(&text_mutex);
1321 for (i = 0; i < KPROBE_TABLE_SIZE; i++) { 1453 for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
1322 head = &kprobe_table[i]; 1454 head = &kprobe_table[i];
1323 hlist_for_each_entry_rcu(p, node, head, hlist) { 1455 hlist_for_each_entry_rcu(p, node, head, hlist) {
1324 if (!arch_trampoline_kprobe(p) && !kprobe_gone(p)) 1456 if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p))
1325 arch_disarm_kprobe(p); 1457 arch_disarm_kprobe(p);
1326 } 1458 }
1327 } 1459 }
@@ -1347,7 +1479,7 @@ static ssize_t read_enabled_file_bool(struct file *file,
1347{ 1479{
1348 char buf[3]; 1480 char buf[3];
1349 1481
1350 if (kprobe_enabled) 1482 if (!kprobes_all_disarmed)
1351 buf[0] = '1'; 1483 buf[0] = '1';
1352 else 1484 else
1353 buf[0] = '0'; 1485 buf[0] = '0';
@@ -1370,12 +1502,12 @@ static ssize_t write_enabled_file_bool(struct file *file,
1370 case 'y': 1502 case 'y':
1371 case 'Y': 1503 case 'Y':
1372 case '1': 1504 case '1':
1373 enable_all_kprobes(); 1505 arm_all_kprobes();
1374 break; 1506 break;
1375 case 'n': 1507 case 'n':
1376 case 'N': 1508 case 'N':
1377 case '0': 1509 case '0':
1378 disable_all_kprobes(); 1510 disarm_all_kprobes();
1379 break; 1511 break;
1380 } 1512 }
1381 1513
@@ -1418,16 +1550,5 @@ late_initcall(debugfs_kprobe_init);
1418 1550
1419module_init(init_kprobes); 1551module_init(init_kprobes);
1420 1552
1421EXPORT_SYMBOL_GPL(register_kprobe); 1553/* defined in arch/.../kernel/kprobes.c */
1422EXPORT_SYMBOL_GPL(unregister_kprobe);
1423EXPORT_SYMBOL_GPL(register_kprobes);
1424EXPORT_SYMBOL_GPL(unregister_kprobes);
1425EXPORT_SYMBOL_GPL(register_jprobe);
1426EXPORT_SYMBOL_GPL(unregister_jprobe);
1427EXPORT_SYMBOL_GPL(register_jprobes);
1428EXPORT_SYMBOL_GPL(unregister_jprobes);
1429EXPORT_SYMBOL_GPL(jprobe_return); 1554EXPORT_SYMBOL_GPL(jprobe_return);
1430EXPORT_SYMBOL_GPL(register_kretprobe);
1431EXPORT_SYMBOL_GPL(unregister_kretprobe);
1432EXPORT_SYMBOL_GPL(register_kretprobes);
1433EXPORT_SYMBOL_GPL(unregister_kretprobes);
diff --git a/kernel/module.c b/kernel/module.c
index c268a771595c..05f014efa32c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1952,9 +1952,6 @@ static noinline struct module *load_module(void __user *umod,
1952 if (strstarts(secstrings+sechdrs[i].sh_name, ".exit")) 1952 if (strstarts(secstrings+sechdrs[i].sh_name, ".exit"))
1953 sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; 1953 sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC;
1954#endif 1954#endif
1955 /* Don't keep __versions around; it's just for loading. */
1956 if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0)
1957 sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC;
1958 } 1955 }
1959 1956
1960 modindex = find_sec(hdr, sechdrs, secstrings, 1957 modindex = find_sec(hdr, sechdrs, secstrings,
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 85d5a2455103..88796c330838 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -166,97 +166,11 @@ void softlockup_tick(void)
166} 166}
167 167
168/* 168/*
169 * Have a reasonable limit on the number of tasks checked:
170 */
171unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
172
173/*
174 * Zero means infinite timeout - no checking done:
175 */
176unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
177
178unsigned long __read_mostly sysctl_hung_task_warnings = 10;
179
180/*
181 * Only do the hung-tasks check on one CPU:
182 */
183static int check_cpu __read_mostly = -1;
184
185static void check_hung_task(struct task_struct *t, unsigned long now)
186{
187 unsigned long switch_count = t->nvcsw + t->nivcsw;
188
189 if (t->flags & PF_FROZEN)
190 return;
191
192 if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
193 t->last_switch_count = switch_count;
194 t->last_switch_timestamp = now;
195 return;
196 }
197 if ((long)(now - t->last_switch_timestamp) <
198 sysctl_hung_task_timeout_secs)
199 return;
200 if (!sysctl_hung_task_warnings)
201 return;
202 sysctl_hung_task_warnings--;
203
204 /*
205 * Ok, the task did not get scheduled for more than 2 minutes,
206 * complain:
207 */
208 printk(KERN_ERR "INFO: task %s:%d blocked for more than "
209 "%ld seconds.\n", t->comm, t->pid,
210 sysctl_hung_task_timeout_secs);
211 printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
212 " disables this message.\n");
213 sched_show_task(t);
214 __debug_show_held_locks(t);
215
216 t->last_switch_timestamp = now;
217 touch_nmi_watchdog();
218
219 if (softlockup_panic)
220 panic("softlockup: blocked tasks");
221}
222
223/*
224 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
225 * a really long time (120 seconds). If that happens, print out
226 * a warning.
227 */
228static void check_hung_uninterruptible_tasks(int this_cpu)
229{
230 int max_count = sysctl_hung_task_check_count;
231 unsigned long now = get_timestamp(this_cpu);
232 struct task_struct *g, *t;
233
234 /*
235 * If the system crashed already then all bets are off,
236 * do not report extra hung tasks:
237 */
238 if (test_taint(TAINT_DIE) || did_panic)
239 return;
240
241 read_lock(&tasklist_lock);
242 do_each_thread(g, t) {
243 if (!--max_count)
244 goto unlock;
245 /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
246 if (t->state == TASK_UNINTERRUPTIBLE)
247 check_hung_task(t, now);
248 } while_each_thread(g, t);
249 unlock:
250 read_unlock(&tasklist_lock);
251}
252
253/*
254 * The watchdog thread - runs every second and touches the timestamp. 169 * The watchdog thread - runs every second and touches the timestamp.
255 */ 170 */
256static int watchdog(void *__bind_cpu) 171static int watchdog(void *__bind_cpu)
257{ 172{
258 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 173 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
259 int this_cpu = (long)__bind_cpu;
260 174
261 sched_setscheduler(current, SCHED_FIFO, &param); 175 sched_setscheduler(current, SCHED_FIFO, &param);
262 176
@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu)
276 if (kthread_should_stop()) 190 if (kthread_should_stop())
277 break; 191 break;
278 192
279 if (this_cpu == check_cpu) {
280 if (sysctl_hung_task_timeout_secs)
281 check_hung_uninterruptible_tasks(this_cpu);
282 }
283
284 set_current_state(TASK_INTERRUPTIBLE); 193 set_current_state(TASK_INTERRUPTIBLE);
285 } 194 }
286 __set_current_state(TASK_RUNNING); 195 __set_current_state(TASK_RUNNING);
@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
312 break; 221 break;
313 case CPU_ONLINE: 222 case CPU_ONLINE:
314 case CPU_ONLINE_FROZEN: 223 case CPU_ONLINE_FROZEN:
315 check_cpu = cpumask_any(cpu_online_mask);
316 wake_up_process(per_cpu(watchdog_task, hotcpu)); 224 wake_up_process(per_cpu(watchdog_task, hotcpu));
317 break; 225 break;
318#ifdef CONFIG_HOTPLUG_CPU 226#ifdef CONFIG_HOTPLUG_CPU
319 case CPU_DOWN_PREPARE:
320 case CPU_DOWN_PREPARE_FROZEN:
321 if (hotcpu == check_cpu) {
322 /* Pick any other online cpu. */
323 check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
324 }
325 break;
326
327 case CPU_UP_CANCELED: 227 case CPU_UP_CANCELED:
328 case CPU_UP_CANCELED_FROZEN: 228 case CPU_UP_CANCELED_FROZEN:
329 if (!per_cpu(watchdog_task, hotcpu)) 229 if (!per_cpu(watchdog_task, hotcpu))
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b125e3387568..4286b62b34a0 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -101,6 +101,7 @@ static int __maybe_unused one = 1;
101static int __maybe_unused two = 2; 101static int __maybe_unused two = 2;
102static unsigned long one_ul = 1; 102static unsigned long one_ul = 1;
103static int one_hundred = 100; 103static int one_hundred = 100;
104static int one_thousand = 1000;
104 105
105/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ 106/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */
106static int maxolduid = 65535; 107static int maxolduid = 65535;
@@ -813,6 +814,19 @@ static struct ctl_table kern_table[] = {
813 .extra1 = &neg_one, 814 .extra1 = &neg_one,
814 .extra2 = &sixty, 815 .extra2 = &sixty,
815 }, 816 },
817#endif
818#ifdef CONFIG_DETECT_HUNG_TASK
819 {
820 .ctl_name = CTL_UNNUMBERED,
821 .procname = "hung_task_panic",
822 .data = &sysctl_hung_task_panic,
823 .maxlen = sizeof(int),
824 .mode = 0644,
825 .proc_handler = &proc_dointvec_minmax,
826 .strategy = &sysctl_intvec,
827 .extra1 = &zero,
828 .extra2 = &one,
829 },
816 { 830 {
817 .ctl_name = CTL_UNNUMBERED, 831 .ctl_name = CTL_UNNUMBERED,
818 .procname = "hung_task_check_count", 832 .procname = "hung_task_check_count",
@@ -828,7 +842,7 @@ static struct ctl_table kern_table[] = {
828 .data = &sysctl_hung_task_timeout_secs, 842 .data = &sysctl_hung_task_timeout_secs,
829 .maxlen = sizeof(unsigned long), 843 .maxlen = sizeof(unsigned long),
830 .mode = 0644, 844 .mode = 0644,
831 .proc_handler = &proc_doulongvec_minmax, 845 .proc_handler = &proc_dohung_task_timeout_secs,
832 .strategy = &sysctl_intvec, 846 .strategy = &sysctl_intvec,
833 }, 847 },
834 { 848 {
@@ -1027,6 +1041,28 @@ static struct ctl_table vm_table[] = {
1027 .proc_handler = &proc_dointvec, 1041 .proc_handler = &proc_dointvec,
1028 }, 1042 },
1029 { 1043 {
1044 .ctl_name = CTL_UNNUMBERED,
1045 .procname = "nr_pdflush_threads_min",
1046 .data = &nr_pdflush_threads_min,
1047 .maxlen = sizeof nr_pdflush_threads_min,
1048 .mode = 0644 /* read-write */,
1049 .proc_handler = &proc_dointvec_minmax,
1050 .strategy = &sysctl_intvec,
1051 .extra1 = &one,
1052 .extra2 = &nr_pdflush_threads_max,
1053 },
1054 {
1055 .ctl_name = CTL_UNNUMBERED,
1056 .procname = "nr_pdflush_threads_max",
1057 .data = &nr_pdflush_threads_max,
1058 .maxlen = sizeof nr_pdflush_threads_max,
1059 .mode = 0644 /* read-write */,
1060 .proc_handler = &proc_dointvec_minmax,
1061 .strategy = &sysctl_intvec,
1062 .extra1 = &nr_pdflush_threads_min,
1063 .extra2 = &one_thousand,
1064 },
1065 {
1030 .ctl_name = VM_SWAPPINESS, 1066 .ctl_name = VM_SWAPPINESS,
1031 .procname = "swappiness", 1067 .procname = "swappiness",
1032 .data = &vm_swappiness, 1068 .data = &vm_swappiness,
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 947c5b3f90c4..b32ff446c3fb 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -327,10 +327,10 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
327 char *msg; 327 char *msg;
328 struct blk_trace *bt; 328 struct blk_trace *bt;
329 329
330 if (count > BLK_TN_MAX_MSG) 330 if (count >= BLK_TN_MAX_MSG)
331 return -EINVAL; 331 return -EINVAL;
332 332
333 msg = kmalloc(count, GFP_KERNEL); 333 msg = kmalloc(count + 1, GFP_KERNEL);
334 if (msg == NULL) 334 if (msg == NULL)
335 return -ENOMEM; 335 return -ENOMEM;
336 336
@@ -339,6 +339,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
339 return -EFAULT; 339 return -EFAULT;
340 } 340 }
341 341
342 msg[count] = '\0';
342 bt = filp->private_data; 343 bt = filp->private_data;
343 __trace_note_message(bt, "%s", msg); 344 __trace_note_message(bt, "%s", msg);
344 kfree(msg); 345 kfree(msg);
@@ -642,7 +643,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
642 if (blk_pc_request(rq)) { 643 if (blk_pc_request(rq)) {
643 what |= BLK_TC_ACT(BLK_TC_PC); 644 what |= BLK_TC_ACT(BLK_TC_PC);
644 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, 645 __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
645 sizeof(rq->cmd), rq->cmd); 646 rq->cmd_len, rq->cmd);
646 } else { 647 } else {
647 what |= BLK_TC_ACT(BLK_TC_FS); 648 what |= BLK_TC_ACT(BLK_TC_FS);
648 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, 649 __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a0174a40c563..9d28476a9851 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -30,6 +30,7 @@
30#include <linux/percpu.h> 30#include <linux/percpu.h>
31#include <linux/splice.h> 31#include <linux/splice.h>
32#include <linux/kdebug.h> 32#include <linux/kdebug.h>
33#include <linux/string.h>
33#include <linux/ctype.h> 34#include <linux/ctype.h>
34#include <linux/init.h> 35#include <linux/init.h>
35#include <linux/poll.h> 36#include <linux/poll.h>
@@ -147,8 +148,7 @@ static int __init set_ftrace_dump_on_oops(char *str)
147} 148}
148__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); 149__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
149 150
150long 151unsigned long long ns2usecs(cycle_t nsec)
151ns2usecs(cycle_t nsec)
152{ 152{
153 nsec += 500; 153 nsec += 500;
154 do_div(nsec, 1000); 154 do_div(nsec, 1000);
@@ -1632,7 +1632,11 @@ static void test_cpu_buff_start(struct trace_iterator *iter)
1632 return; 1632 return;
1633 1633
1634 cpumask_set_cpu(iter->cpu, iter->started); 1634 cpumask_set_cpu(iter->cpu, iter->started);
1635 trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); 1635
1636 /* Don't print started cpu buffer for the first entry of the trace */
1637 if (iter->idx > 1)
1638 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
1639 iter->cpu);
1636} 1640}
1637 1641
1638static enum print_line_t print_trace_fmt(struct trace_iterator *iter) 1642static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
@@ -1867,6 +1871,11 @@ __tracing_open(struct inode *inode, struct file *file)
1867 if (current_trace) 1871 if (current_trace)
1868 *iter->trace = *current_trace; 1872 *iter->trace = *current_trace;
1869 1873
1874 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL))
1875 goto fail;
1876
1877 cpumask_clear(iter->started);
1878
1870 if (current_trace && current_trace->print_max) 1879 if (current_trace && current_trace->print_max)
1871 iter->tr = &max_tr; 1880 iter->tr = &max_tr;
1872 else 1881 else
@@ -1917,6 +1926,7 @@ __tracing_open(struct inode *inode, struct file *file)
1917 if (iter->buffer_iter[cpu]) 1926 if (iter->buffer_iter[cpu])
1918 ring_buffer_read_finish(iter->buffer_iter[cpu]); 1927 ring_buffer_read_finish(iter->buffer_iter[cpu]);
1919 } 1928 }
1929 free_cpumask_var(iter->started);
1920 fail: 1930 fail:
1921 mutex_unlock(&trace_types_lock); 1931 mutex_unlock(&trace_types_lock);
1922 kfree(iter->trace); 1932 kfree(iter->trace);
@@ -1960,6 +1970,7 @@ static int tracing_release(struct inode *inode, struct file *file)
1960 1970
1961 seq_release(inode, file); 1971 seq_release(inode, file);
1962 mutex_destroy(&iter->mutex); 1972 mutex_destroy(&iter->mutex);
1973 free_cpumask_var(iter->started);
1963 kfree(iter->trace); 1974 kfree(iter->trace);
1964 kfree(iter); 1975 kfree(iter);
1965 return 0; 1976 return 0;
@@ -2358,9 +2369,9 @@ static const char readme_msg[] =
2358 "# mkdir /debug\n" 2369 "# mkdir /debug\n"
2359 "# mount -t debugfs nodev /debug\n\n" 2370 "# mount -t debugfs nodev /debug\n\n"
2360 "# cat /debug/tracing/available_tracers\n" 2371 "# cat /debug/tracing/available_tracers\n"
2361 "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n" 2372 "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n"
2362 "# cat /debug/tracing/current_tracer\n" 2373 "# cat /debug/tracing/current_tracer\n"
2363 "none\n" 2374 "nop\n"
2364 "# echo sched_switch > /debug/tracing/current_tracer\n" 2375 "# echo sched_switch > /debug/tracing/current_tracer\n"
2365 "# cat /debug/tracing/current_tracer\n" 2376 "# cat /debug/tracing/current_tracer\n"
2366 "sched_switch\n" 2377 "sched_switch\n"
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cbc168f1e43d..e685ac2b2ba1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -602,7 +602,7 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
602#endif /* CONFIG_FTRACE_STARTUP_TEST */ 602#endif /* CONFIG_FTRACE_STARTUP_TEST */
603 603
604extern void *head_page(struct trace_array_cpu *data); 604extern void *head_page(struct trace_array_cpu *data);
605extern long ns2usecs(cycle_t nsec); 605extern unsigned long long ns2usecs(cycle_t nsec);
606extern int 606extern int
607trace_vbprintk(unsigned long ip, const char *fmt, va_list args); 607trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
608extern int 608extern int
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 4d9952d3df50..07a22c33ebf3 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -40,7 +40,7 @@
40 40
41#undef TRACE_FIELD_ZERO_CHAR 41#undef TRACE_FIELD_ZERO_CHAR
42#define TRACE_FIELD_ZERO_CHAR(item) \ 42#define TRACE_FIELD_ZERO_CHAR(item) \
43 ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \ 43 ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \
44 "offset:%u;\tsize:0;\n", \ 44 "offset:%u;\tsize:0;\n", \
45 (unsigned int)offsetof(typeof(field), item)); \ 45 (unsigned int)offsetof(typeof(field), item)); \
46 if (!ret) \ 46 if (!ret) \
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index d72b9a63b247..64b54a59c55b 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -423,7 +423,7 @@ int trace_print_lat_context(struct trace_iterator *iter)
423 423
424 trace_find_cmdline(entry->pid, comm); 424 trace_find_cmdline(entry->pid, comm);
425 425
426 ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]" 426 ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]"
427 " %ld.%03ldms (+%ld.%03ldms): ", comm, 427 " %ld.%03ldms (+%ld.%03ldms): ", comm,
428 entry->pid, iter->cpu, entry->flags, 428 entry->pid, iter->cpu, entry->flags,
429 entry->preempt_count, iter->idx, 429 entry->preempt_count, iter->idx,
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
index de35f200abd3..9117cea6f1ae 100644
--- a/kernel/trace/trace_sched_switch.c
+++ b/kernel/trace/trace_sched_switch.c
@@ -62,6 +62,9 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
62 pc = preempt_count(); 62 pc = preempt_count();
63 tracing_record_cmdline(current); 63 tracing_record_cmdline(current);
64 64
65 if (sched_stopped)
66 return;
67
65 local_irq_save(flags); 68 local_irq_save(flags);
66 cpu = raw_smp_processor_id(); 69 cpu = raw_smp_processor_id();
67 data = ctx_trace->data[cpu]; 70 data = ctx_trace->data[cpu];
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 3c5ad6b2ec84..5bc00e8f153e 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -154,7 +154,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
154 if (unlikely(!tracer_enabled || next != wakeup_task)) 154 if (unlikely(!tracer_enabled || next != wakeup_task))
155 goto out_unlock; 155 goto out_unlock;
156 156
157 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 157 trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
158 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); 158 tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
159 159
160 /* 160 /*
@@ -257,6 +257,12 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
257 data = wakeup_trace->data[wakeup_cpu]; 257 data = wakeup_trace->data[wakeup_cpu];
258 data->preempt_timestamp = ftrace_now(cpu); 258 data->preempt_timestamp = ftrace_now(cpu);
259 tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); 259 tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
260
261 /*
262 * We must be careful in using CALLER_ADDR2. But since wake_up
263 * is not called by an assembly function (where as schedule is)
264 * it should be safe to use it here.
265 */
260 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); 266 trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
261 267
262out_locked: 268out_locked: