diff options
| author | Ingo Molnar <mingo@elte.hu> | 2009-04-08 04:35:30 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2009-04-08 04:35:30 -0400 |
| commit | 5ea472a77f8e4811ceee3f44a9deda6ad6e8b789 (patch) | |
| tree | a9ec5019e2b666a19874fc344ffb0dd5da6bce94 /kernel | |
| parent | 6c009ecef8cca28c7c09eb16d0802e37915a76e1 (diff) | |
| parent | 577c9c456f0e1371cbade38eaf91ae8e8a308555 (diff) | |
Merge commit 'v2.6.30-rc1' into perfcounters/core
Conflicts:
arch/powerpc/include/asm/systbl.h
arch/powerpc/include/asm/unistd.h
include/linux/init_task.h
Merge reason: the conflicts are non-trivial: PowerPC placement
of sys_perf_counter_open has to be mixed with the
new preadv/pwrite syscalls.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 1 | ||||
| -rw-r--r-- | kernel/exit.c | 2 | ||||
| -rw-r--r-- | kernel/fork.c | 8 | ||||
| -rw-r--r-- | kernel/hung_task.c | 217 | ||||
| -rw-r--r-- | kernel/irq/devres.c | 16 | ||||
| -rw-r--r-- | kernel/irq/handle.c | 50 | ||||
| -rw-r--r-- | kernel/irq/manage.c | 189 | ||||
| -rw-r--r-- | kernel/kprobes.c | 281 | ||||
| -rw-r--r-- | kernel/module.c | 3 | ||||
| -rw-r--r-- | kernel/softlockup.c | 100 | ||||
| -rw-r--r-- | kernel/sysctl.c | 38 | ||||
| -rw-r--r-- | kernel/trace/blktrace.c | 7 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 21 | ||||
| -rw-r--r-- | kernel/trace/trace.h | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_export.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_output.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_sched_switch.c | 3 | ||||
| -rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 8 |
18 files changed, 729 insertions, 221 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 63c697529ca1..e914ca992d70 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o | |||
| 74 | obj-$(CONFIG_KPROBES) += kprobes.o | 74 | obj-$(CONFIG_KPROBES) += kprobes.o |
| 75 | obj-$(CONFIG_KGDB) += kgdb.o | 75 | obj-$(CONFIG_KGDB) += kgdb.o |
| 76 | obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o | 76 | obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o |
| 77 | obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o | ||
| 77 | obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ | 78 | obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ |
| 78 | obj-$(CONFIG_SECCOMP) += seccomp.o | 79 | obj-$(CONFIG_SECCOMP) += seccomp.o |
| 79 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 80 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
diff --git a/kernel/exit.c b/kernel/exit.c index fbb5d94c8bbc..4741376c8dec 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -926,6 +926,8 @@ NORET_TYPE void do_exit(long code) | |||
| 926 | schedule(); | 926 | schedule(); |
| 927 | } | 927 | } |
| 928 | 928 | ||
| 929 | exit_irq_thread(); | ||
| 930 | |||
| 929 | exit_signals(tsk); /* sets PF_EXITING */ | 931 | exit_signals(tsk); /* sets PF_EXITING */ |
| 930 | /* | 932 | /* |
| 931 | * tsk->flags are checked in the futex code to protect against | 933 | * tsk->flags are checked in the futex code to protect against |
diff --git a/kernel/fork.c b/kernel/fork.c index 381d7f9b70fb..89c1efb3ccf4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -645,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | |||
| 645 | 645 | ||
| 646 | tsk->min_flt = tsk->maj_flt = 0; | 646 | tsk->min_flt = tsk->maj_flt = 0; |
| 647 | tsk->nvcsw = tsk->nivcsw = 0; | 647 | tsk->nvcsw = tsk->nivcsw = 0; |
| 648 | #ifdef CONFIG_DETECT_HUNG_TASK | ||
| 649 | tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; | ||
| 650 | #endif | ||
| 648 | 651 | ||
| 649 | tsk->mm = NULL; | 652 | tsk->mm = NULL; |
| 650 | tsk->active_mm = NULL; | 653 | tsk->active_mm = NULL; |
| @@ -1033,11 +1036,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1033 | 1036 | ||
| 1034 | p->default_timer_slack_ns = current->timer_slack_ns; | 1037 | p->default_timer_slack_ns = current->timer_slack_ns; |
| 1035 | 1038 | ||
| 1036 | #ifdef CONFIG_DETECT_SOFTLOCKUP | ||
| 1037 | p->last_switch_count = 0; | ||
| 1038 | p->last_switch_timestamp = 0; | ||
| 1039 | #endif | ||
| 1040 | |||
| 1041 | task_io_accounting_init(&p->ioac); | 1039 | task_io_accounting_init(&p->ioac); |
| 1042 | acct_clear_integrals(p); | 1040 | acct_clear_integrals(p); |
| 1043 | 1041 | ||
diff --git a/kernel/hung_task.c b/kernel/hung_task.c new file mode 100644 index 000000000000..022a4927b785 --- /dev/null +++ b/kernel/hung_task.c | |||
| @@ -0,0 +1,217 @@ | |||
| 1 | /* | ||
| 2 | * Detect Hung Task | ||
| 3 | * | ||
| 4 | * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state | ||
| 5 | * | ||
| 6 | */ | ||
| 7 | |||
| 8 | #include <linux/mm.h> | ||
| 9 | #include <linux/cpu.h> | ||
| 10 | #include <linux/nmi.h> | ||
| 11 | #include <linux/init.h> | ||
| 12 | #include <linux/delay.h> | ||
| 13 | #include <linux/freezer.h> | ||
| 14 | #include <linux/kthread.h> | ||
| 15 | #include <linux/lockdep.h> | ||
| 16 | #include <linux/module.h> | ||
| 17 | #include <linux/sysctl.h> | ||
| 18 | |||
| 19 | /* | ||
| 20 | * The number of tasks checked: | ||
| 21 | */ | ||
| 22 | unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; | ||
| 23 | |||
| 24 | /* | ||
| 25 | * Limit number of tasks checked in a batch. | ||
| 26 | * | ||
| 27 | * This value controls the preemptibility of khungtaskd since preemption | ||
| 28 | * is disabled during the critical section. It also controls the size of | ||
| 29 | * the RCU grace period. So it needs to be upper-bound. | ||
| 30 | */ | ||
| 31 | #define HUNG_TASK_BATCHING 1024 | ||
| 32 | |||
| 33 | /* | ||
| 34 | * Zero means infinite timeout - no checking done: | ||
| 35 | */ | ||
| 36 | unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; | ||
| 37 | |||
| 38 | unsigned long __read_mostly sysctl_hung_task_warnings = 10; | ||
| 39 | |||
| 40 | static int __read_mostly did_panic; | ||
| 41 | |||
| 42 | static struct task_struct *watchdog_task; | ||
| 43 | |||
| 44 | /* | ||
| 45 | * Should we panic (and reboot, if panic_timeout= is set) when a | ||
| 46 | * hung task is detected: | ||
| 47 | */ | ||
| 48 | unsigned int __read_mostly sysctl_hung_task_panic = | ||
| 49 | CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE; | ||
| 50 | |||
| 51 | static int __init hung_task_panic_setup(char *str) | ||
| 52 | { | ||
| 53 | sysctl_hung_task_panic = simple_strtoul(str, NULL, 0); | ||
| 54 | |||
| 55 | return 1; | ||
| 56 | } | ||
| 57 | __setup("hung_task_panic=", hung_task_panic_setup); | ||
| 58 | |||
| 59 | static int | ||
| 60 | hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr) | ||
| 61 | { | ||
| 62 | did_panic = 1; | ||
| 63 | |||
| 64 | return NOTIFY_DONE; | ||
| 65 | } | ||
| 66 | |||
| 67 | static struct notifier_block panic_block = { | ||
| 68 | .notifier_call = hung_task_panic, | ||
| 69 | }; | ||
| 70 | |||
| 71 | static void check_hung_task(struct task_struct *t, unsigned long timeout) | ||
| 72 | { | ||
| 73 | unsigned long switch_count = t->nvcsw + t->nivcsw; | ||
| 74 | |||
| 75 | /* | ||
| 76 | * Ensure the task is not frozen. | ||
| 77 | * Also, when a freshly created task is scheduled once, changes | ||
| 78 | * its state to TASK_UNINTERRUPTIBLE without having ever been | ||
| 79 | * switched out once, it musn't be checked. | ||
| 80 | */ | ||
| 81 | if (unlikely(t->flags & PF_FROZEN || !switch_count)) | ||
| 82 | return; | ||
| 83 | |||
| 84 | if (switch_count != t->last_switch_count) { | ||
| 85 | t->last_switch_count = switch_count; | ||
| 86 | return; | ||
| 87 | } | ||
| 88 | if (!sysctl_hung_task_warnings) | ||
| 89 | return; | ||
| 90 | sysctl_hung_task_warnings--; | ||
| 91 | |||
| 92 | /* | ||
| 93 | * Ok, the task did not get scheduled for more than 2 minutes, | ||
| 94 | * complain: | ||
| 95 | */ | ||
| 96 | printk(KERN_ERR "INFO: task %s:%d blocked for more than " | ||
| 97 | "%ld seconds.\n", t->comm, t->pid, timeout); | ||
| 98 | printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" | ||
| 99 | " disables this message.\n"); | ||
| 100 | sched_show_task(t); | ||
| 101 | __debug_show_held_locks(t); | ||
| 102 | |||
| 103 | touch_nmi_watchdog(); | ||
| 104 | |||
| 105 | if (sysctl_hung_task_panic) | ||
| 106 | panic("hung_task: blocked tasks"); | ||
| 107 | } | ||
| 108 | |||
| 109 | /* | ||
| 110 | * To avoid extending the RCU grace period for an unbounded amount of time, | ||
| 111 | * periodically exit the critical section and enter a new one. | ||
| 112 | * | ||
| 113 | * For preemptible RCU it is sufficient to call rcu_read_unlock in order | ||
| 114 | * exit the grace period. For classic RCU, a reschedule is required. | ||
| 115 | */ | ||
| 116 | static void rcu_lock_break(struct task_struct *g, struct task_struct *t) | ||
| 117 | { | ||
| 118 | get_task_struct(g); | ||
| 119 | get_task_struct(t); | ||
| 120 | rcu_read_unlock(); | ||
| 121 | cond_resched(); | ||
| 122 | rcu_read_lock(); | ||
| 123 | put_task_struct(t); | ||
| 124 | put_task_struct(g); | ||
| 125 | } | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for | ||
| 129 | * a really long time (120 seconds). If that happens, print out | ||
| 130 | * a warning. | ||
| 131 | */ | ||
| 132 | static void check_hung_uninterruptible_tasks(unsigned long timeout) | ||
| 133 | { | ||
| 134 | int max_count = sysctl_hung_task_check_count; | ||
| 135 | int batch_count = HUNG_TASK_BATCHING; | ||
| 136 | struct task_struct *g, *t; | ||
| 137 | |||
| 138 | /* | ||
| 139 | * If the system crashed already then all bets are off, | ||
| 140 | * do not report extra hung tasks: | ||
| 141 | */ | ||
| 142 | if (test_taint(TAINT_DIE) || did_panic) | ||
| 143 | return; | ||
| 144 | |||
| 145 | rcu_read_lock(); | ||
| 146 | do_each_thread(g, t) { | ||
| 147 | if (!--max_count) | ||
| 148 | goto unlock; | ||
| 149 | if (!--batch_count) { | ||
| 150 | batch_count = HUNG_TASK_BATCHING; | ||
| 151 | rcu_lock_break(g, t); | ||
| 152 | /* Exit if t or g was unhashed during refresh. */ | ||
| 153 | if (t->state == TASK_DEAD || g->state == TASK_DEAD) | ||
| 154 | goto unlock; | ||
| 155 | } | ||
| 156 | /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ | ||
| 157 | if (t->state == TASK_UNINTERRUPTIBLE) | ||
| 158 | check_hung_task(t, timeout); | ||
| 159 | } while_each_thread(g, t); | ||
| 160 | unlock: | ||
| 161 | rcu_read_unlock(); | ||
| 162 | } | ||
| 163 | |||
| 164 | static unsigned long timeout_jiffies(unsigned long timeout) | ||
| 165 | { | ||
| 166 | /* timeout of 0 will disable the watchdog */ | ||
| 167 | return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT; | ||
| 168 | } | ||
| 169 | |||
| 170 | /* | ||
| 171 | * Process updating of timeout sysctl | ||
| 172 | */ | ||
| 173 | int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, | ||
| 174 | struct file *filp, void __user *buffer, | ||
| 175 | size_t *lenp, loff_t *ppos) | ||
| 176 | { | ||
| 177 | int ret; | ||
| 178 | |||
| 179 | ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
| 180 | |||
| 181 | if (ret || !write) | ||
| 182 | goto out; | ||
| 183 | |||
| 184 | wake_up_process(watchdog_task); | ||
| 185 | |||
| 186 | out: | ||
| 187 | return ret; | ||
| 188 | } | ||
| 189 | |||
| 190 | /* | ||
| 191 | * kthread which checks for tasks stuck in D state | ||
| 192 | */ | ||
| 193 | static int watchdog(void *dummy) | ||
| 194 | { | ||
| 195 | set_user_nice(current, 0); | ||
| 196 | |||
| 197 | for ( ; ; ) { | ||
| 198 | unsigned long timeout = sysctl_hung_task_timeout_secs; | ||
| 199 | |||
| 200 | while (schedule_timeout_interruptible(timeout_jiffies(timeout))) | ||
| 201 | timeout = sysctl_hung_task_timeout_secs; | ||
| 202 | |||
| 203 | check_hung_uninterruptible_tasks(timeout); | ||
| 204 | } | ||
| 205 | |||
| 206 | return 0; | ||
| 207 | } | ||
| 208 | |||
| 209 | static int __init hung_task_init(void) | ||
| 210 | { | ||
| 211 | atomic_notifier_chain_register(&panic_notifier_list, &panic_block); | ||
| 212 | watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); | ||
| 213 | |||
| 214 | return 0; | ||
| 215 | } | ||
| 216 | |||
| 217 | module_init(hung_task_init); | ||
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 38a25b8d8bff..d06df9c41cba 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c | |||
| @@ -26,10 +26,12 @@ static int devm_irq_match(struct device *dev, void *res, void *data) | |||
| 26 | } | 26 | } |
| 27 | 27 | ||
| 28 | /** | 28 | /** |
| 29 | * devm_request_irq - allocate an interrupt line for a managed device | 29 | * devm_request_threaded_irq - allocate an interrupt line for a managed device |
| 30 | * @dev: device to request interrupt for | 30 | * @dev: device to request interrupt for |
| 31 | * @irq: Interrupt line to allocate | 31 | * @irq: Interrupt line to allocate |
| 32 | * @handler: Function to be called when the IRQ occurs | 32 | * @handler: Function to be called when the IRQ occurs |
| 33 | * @thread_fn: function to be called in a threaded interrupt context. NULL | ||
| 34 | * for devices which handle everything in @handler | ||
| 33 | * @irqflags: Interrupt type flags | 35 | * @irqflags: Interrupt type flags |
| 34 | * @devname: An ascii name for the claiming device | 36 | * @devname: An ascii name for the claiming device |
| 35 | * @dev_id: A cookie passed back to the handler function | 37 | * @dev_id: A cookie passed back to the handler function |
| @@ -42,9 +44,10 @@ static int devm_irq_match(struct device *dev, void *res, void *data) | |||
| 42 | * If an IRQ allocated with this function needs to be freed | 44 | * If an IRQ allocated with this function needs to be freed |
| 43 | * separately, dev_free_irq() must be used. | 45 | * separately, dev_free_irq() must be used. |
| 44 | */ | 46 | */ |
| 45 | int devm_request_irq(struct device *dev, unsigned int irq, | 47 | int devm_request_threaded_irq(struct device *dev, unsigned int irq, |
| 46 | irq_handler_t handler, unsigned long irqflags, | 48 | irq_handler_t handler, irq_handler_t thread_fn, |
| 47 | const char *devname, void *dev_id) | 49 | unsigned long irqflags, const char *devname, |
| 50 | void *dev_id) | ||
| 48 | { | 51 | { |
| 49 | struct irq_devres *dr; | 52 | struct irq_devres *dr; |
| 50 | int rc; | 53 | int rc; |
| @@ -54,7 +57,8 @@ int devm_request_irq(struct device *dev, unsigned int irq, | |||
| 54 | if (!dr) | 57 | if (!dr) |
| 55 | return -ENOMEM; | 58 | return -ENOMEM; |
| 56 | 59 | ||
| 57 | rc = request_irq(irq, handler, irqflags, devname, dev_id); | 60 | rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname, |
| 61 | dev_id); | ||
| 58 | if (rc) { | 62 | if (rc) { |
| 59 | devres_free(dr); | 63 | devres_free(dr); |
| 60 | return rc; | 64 | return rc; |
| @@ -66,7 +70,7 @@ int devm_request_irq(struct device *dev, unsigned int irq, | |||
| 66 | 70 | ||
| 67 | return 0; | 71 | return 0; |
| 68 | } | 72 | } |
| 69 | EXPORT_SYMBOL(devm_request_irq); | 73 | EXPORT_SYMBOL(devm_request_threaded_irq); |
| 70 | 74 | ||
| 71 | /** | 75 | /** |
| 72 | * devm_free_irq - free an interrupt | 76 | * devm_free_irq - free an interrupt |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 343acecae629..d82142be8dd2 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
| @@ -339,6 +339,15 @@ irqreturn_t no_action(int cpl, void *dev_id) | |||
| 339 | return IRQ_NONE; | 339 | return IRQ_NONE; |
| 340 | } | 340 | } |
| 341 | 341 | ||
| 342 | static void warn_no_thread(unsigned int irq, struct irqaction *action) | ||
| 343 | { | ||
| 344 | if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags)) | ||
| 345 | return; | ||
| 346 | |||
| 347 | printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD " | ||
| 348 | "but no thread function available.", irq, action->name); | ||
| 349 | } | ||
| 350 | |||
| 342 | DEFINE_TRACE(irq_handler_entry); | 351 | DEFINE_TRACE(irq_handler_entry); |
| 343 | DEFINE_TRACE(irq_handler_exit); | 352 | DEFINE_TRACE(irq_handler_exit); |
| 344 | 353 | ||
| @@ -363,8 +372,47 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) | |||
| 363 | trace_irq_handler_entry(irq, action); | 372 | trace_irq_handler_entry(irq, action); |
| 364 | ret = action->handler(irq, action->dev_id); | 373 | ret = action->handler(irq, action->dev_id); |
| 365 | trace_irq_handler_exit(irq, action, ret); | 374 | trace_irq_handler_exit(irq, action, ret); |
| 366 | if (ret == IRQ_HANDLED) | 375 | |
| 376 | switch (ret) { | ||
| 377 | case IRQ_WAKE_THREAD: | ||
| 378 | /* | ||
| 379 | * Set result to handled so the spurious check | ||
| 380 | * does not trigger. | ||
| 381 | */ | ||
| 382 | ret = IRQ_HANDLED; | ||
| 383 | |||
| 384 | /* | ||
| 385 | * Catch drivers which return WAKE_THREAD but | ||
| 386 | * did not set up a thread function | ||
| 387 | */ | ||
| 388 | if (unlikely(!action->thread_fn)) { | ||
| 389 | warn_no_thread(irq, action); | ||
| 390 | break; | ||
| 391 | } | ||
| 392 | |||
| 393 | /* | ||
| 394 | * Wake up the handler thread for this | ||
| 395 | * action. In case the thread crashed and was | ||
| 396 | * killed we just pretend that we handled the | ||
| 397 | * interrupt. The hardirq handler above has | ||
| 398 | * disabled the device interrupt, so no irq | ||
| 399 | * storm is lurking. | ||
| 400 | */ | ||
| 401 | if (likely(!test_bit(IRQTF_DIED, | ||
| 402 | &action->thread_flags))) { | ||
| 403 | set_bit(IRQTF_RUNTHREAD, &action->thread_flags); | ||
| 404 | wake_up_process(action->thread); | ||
| 405 | } | ||
| 406 | |||
| 407 | /* Fall through to add to randomness */ | ||
| 408 | case IRQ_HANDLED: | ||
| 367 | status |= action->flags; | 409 | status |= action->flags; |
| 410 | break; | ||
| 411 | |||
| 412 | default: | ||
| 413 | break; | ||
| 414 | } | ||
| 415 | |||
| 368 | retval |= ret; | 416 | retval |= ret; |
| 369 | action = action->next; | 417 | action = action->next; |
| 370 | } while (action); | 418 | } while (action); |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 1516ab77355c..7e2e7dd4cd2f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -8,16 +8,15 @@ | |||
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #include <linux/irq.h> | 10 | #include <linux/irq.h> |
| 11 | #include <linux/kthread.h> | ||
| 11 | #include <linux/module.h> | 12 | #include <linux/module.h> |
| 12 | #include <linux/random.h> | 13 | #include <linux/random.h> |
| 13 | #include <linux/interrupt.h> | 14 | #include <linux/interrupt.h> |
| 14 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
| 16 | #include <linux/sched.h> | ||
| 15 | 17 | ||
| 16 | #include "internals.h" | 18 | #include "internals.h" |
| 17 | 19 | ||
| 18 | #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) | ||
| 19 | cpumask_var_t irq_default_affinity; | ||
| 20 | |||
| 21 | /** | 20 | /** |
| 22 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) | 21 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) |
| 23 | * @irq: interrupt number to wait for | 22 | * @irq: interrupt number to wait for |
| @@ -53,9 +52,18 @@ void synchronize_irq(unsigned int irq) | |||
| 53 | 52 | ||
| 54 | /* Oops, that failed? */ | 53 | /* Oops, that failed? */ |
| 55 | } while (status & IRQ_INPROGRESS); | 54 | } while (status & IRQ_INPROGRESS); |
| 55 | |||
| 56 | /* | ||
| 57 | * We made sure that no hardirq handler is running. Now verify | ||
| 58 | * that no threaded handlers are active. | ||
| 59 | */ | ||
| 60 | wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active)); | ||
| 56 | } | 61 | } |
| 57 | EXPORT_SYMBOL(synchronize_irq); | 62 | EXPORT_SYMBOL(synchronize_irq); |
| 58 | 63 | ||
| 64 | #ifdef CONFIG_SMP | ||
| 65 | cpumask_var_t irq_default_affinity; | ||
| 66 | |||
| 59 | /** | 67 | /** |
| 60 | * irq_can_set_affinity - Check if the affinity of a given irq can be set | 68 | * irq_can_set_affinity - Check if the affinity of a given irq can be set |
| 61 | * @irq: Interrupt to check | 69 | * @irq: Interrupt to check |
| @@ -72,6 +80,18 @@ int irq_can_set_affinity(unsigned int irq) | |||
| 72 | return 1; | 80 | return 1; |
| 73 | } | 81 | } |
| 74 | 82 | ||
| 83 | static void | ||
| 84 | irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask) | ||
| 85 | { | ||
| 86 | struct irqaction *action = desc->action; | ||
| 87 | |||
| 88 | while (action) { | ||
| 89 | if (action->thread) | ||
| 90 | set_cpus_allowed_ptr(action->thread, cpumask); | ||
| 91 | action = action->next; | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 75 | /** | 95 | /** |
| 76 | * irq_set_affinity - Set the irq affinity of a given irq | 96 | * irq_set_affinity - Set the irq affinity of a given irq |
| 77 | * @irq: Interrupt to set affinity | 97 | * @irq: Interrupt to set affinity |
| @@ -100,6 +120,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) | |||
| 100 | cpumask_copy(desc->affinity, cpumask); | 120 | cpumask_copy(desc->affinity, cpumask); |
| 101 | desc->chip->set_affinity(irq, cpumask); | 121 | desc->chip->set_affinity(irq, cpumask); |
| 102 | #endif | 122 | #endif |
| 123 | irq_set_thread_affinity(desc, cpumask); | ||
| 103 | desc->status |= IRQ_AFFINITY_SET; | 124 | desc->status |= IRQ_AFFINITY_SET; |
| 104 | spin_unlock_irqrestore(&desc->lock, flags); | 125 | spin_unlock_irqrestore(&desc->lock, flags); |
| 105 | return 0; | 126 | return 0; |
| @@ -150,6 +171,8 @@ int irq_select_affinity_usr(unsigned int irq) | |||
| 150 | 171 | ||
| 151 | spin_lock_irqsave(&desc->lock, flags); | 172 | spin_lock_irqsave(&desc->lock, flags); |
| 152 | ret = setup_affinity(irq, desc); | 173 | ret = setup_affinity(irq, desc); |
| 174 | if (!ret) | ||
| 175 | irq_set_thread_affinity(desc, desc->affinity); | ||
| 153 | spin_unlock_irqrestore(&desc->lock, flags); | 176 | spin_unlock_irqrestore(&desc->lock, flags); |
| 154 | 177 | ||
| 155 | return ret; | 178 | return ret; |
| @@ -401,6 +424,90 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
| 401 | return ret; | 424 | return ret; |
| 402 | } | 425 | } |
| 403 | 426 | ||
| 427 | static int irq_wait_for_interrupt(struct irqaction *action) | ||
| 428 | { | ||
| 429 | while (!kthread_should_stop()) { | ||
| 430 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 431 | |||
| 432 | if (test_and_clear_bit(IRQTF_RUNTHREAD, | ||
| 433 | &action->thread_flags)) { | ||
| 434 | __set_current_state(TASK_RUNNING); | ||
| 435 | return 0; | ||
| 436 | } | ||
| 437 | schedule(); | ||
| 438 | } | ||
| 439 | return -1; | ||
| 440 | } | ||
| 441 | |||
| 442 | /* | ||
| 443 | * Interrupt handler thread | ||
| 444 | */ | ||
| 445 | static int irq_thread(void *data) | ||
| 446 | { | ||
| 447 | struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; | ||
| 448 | struct irqaction *action = data; | ||
| 449 | struct irq_desc *desc = irq_to_desc(action->irq); | ||
| 450 | int wake; | ||
| 451 | |||
| 452 | sched_setscheduler(current, SCHED_FIFO, ¶m); | ||
| 453 | current->irqaction = action; | ||
| 454 | |||
| 455 | while (!irq_wait_for_interrupt(action)) { | ||
| 456 | |||
| 457 | atomic_inc(&desc->threads_active); | ||
| 458 | |||
| 459 | spin_lock_irq(&desc->lock); | ||
| 460 | if (unlikely(desc->status & IRQ_DISABLED)) { | ||
| 461 | /* | ||
| 462 | * CHECKME: We might need a dedicated | ||
| 463 | * IRQ_THREAD_PENDING flag here, which | ||
| 464 | * retriggers the thread in check_irq_resend() | ||
| 465 | * but AFAICT IRQ_PENDING should be fine as it | ||
| 466 | * retriggers the interrupt itself --- tglx | ||
| 467 | */ | ||
| 468 | desc->status |= IRQ_PENDING; | ||
| 469 | spin_unlock_irq(&desc->lock); | ||
| 470 | } else { | ||
| 471 | spin_unlock_irq(&desc->lock); | ||
| 472 | |||
| 473 | action->thread_fn(action->irq, action->dev_id); | ||
| 474 | } | ||
| 475 | |||
| 476 | wake = atomic_dec_and_test(&desc->threads_active); | ||
| 477 | |||
| 478 | if (wake && waitqueue_active(&desc->wait_for_threads)) | ||
| 479 | wake_up(&desc->wait_for_threads); | ||
| 480 | } | ||
| 481 | |||
| 482 | /* | ||
| 483 | * Clear irqaction. Otherwise exit_irq_thread() would make | ||
| 484 | * fuzz about an active irq thread going into nirvana. | ||
| 485 | */ | ||
| 486 | current->irqaction = NULL; | ||
| 487 | return 0; | ||
| 488 | } | ||
| 489 | |||
| 490 | /* | ||
| 491 | * Called from do_exit() | ||
| 492 | */ | ||
| 493 | void exit_irq_thread(void) | ||
| 494 | { | ||
| 495 | struct task_struct *tsk = current; | ||
| 496 | |||
| 497 | if (!tsk->irqaction) | ||
| 498 | return; | ||
| 499 | |||
| 500 | printk(KERN_ERR | ||
| 501 | "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", | ||
| 502 | tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq); | ||
| 503 | |||
| 504 | /* | ||
| 505 | * Set the THREAD DIED flag to prevent further wakeups of the | ||
| 506 | * soon to be gone threaded handler. | ||
| 507 | */ | ||
| 508 | set_bit(IRQTF_DIED, &tsk->irqaction->flags); | ||
| 509 | } | ||
| 510 | |||
| 404 | /* | 511 | /* |
| 405 | * Internal function to register an irqaction - typically used to | 512 | * Internal function to register an irqaction - typically used to |
| 406 | * allocate special interrupts that are part of the architecture. | 513 | * allocate special interrupts that are part of the architecture. |
| @@ -437,6 +544,26 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
| 437 | } | 544 | } |
| 438 | 545 | ||
| 439 | /* | 546 | /* |
| 547 | * Threaded handler ? | ||
| 548 | */ | ||
| 549 | if (new->thread_fn) { | ||
| 550 | struct task_struct *t; | ||
| 551 | |||
| 552 | t = kthread_create(irq_thread, new, "irq/%d-%s", irq, | ||
| 553 | new->name); | ||
| 554 | if (IS_ERR(t)) | ||
| 555 | return PTR_ERR(t); | ||
| 556 | /* | ||
| 557 | * We keep the reference to the task struct even if | ||
| 558 | * the thread dies to avoid that the interrupt code | ||
| 559 | * references an already freed task_struct. | ||
| 560 | */ | ||
| 561 | get_task_struct(t); | ||
| 562 | new->thread = t; | ||
| 563 | wake_up_process(t); | ||
| 564 | } | ||
| 565 | |||
| 566 | /* | ||
| 440 | * The following block of code has to be executed atomically | 567 | * The following block of code has to be executed atomically |
| 441 | */ | 568 | */ |
| 442 | spin_lock_irqsave(&desc->lock, flags); | 569 | spin_lock_irqsave(&desc->lock, flags); |
| @@ -473,15 +600,15 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
| 473 | if (!shared) { | 600 | if (!shared) { |
| 474 | irq_chip_set_defaults(desc->chip); | 601 | irq_chip_set_defaults(desc->chip); |
| 475 | 602 | ||
| 603 | init_waitqueue_head(&desc->wait_for_threads); | ||
| 604 | |||
| 476 | /* Setup the type (level, edge polarity) if configured: */ | 605 | /* Setup the type (level, edge polarity) if configured: */ |
| 477 | if (new->flags & IRQF_TRIGGER_MASK) { | 606 | if (new->flags & IRQF_TRIGGER_MASK) { |
| 478 | ret = __irq_set_trigger(desc, irq, | 607 | ret = __irq_set_trigger(desc, irq, |
| 479 | new->flags & IRQF_TRIGGER_MASK); | 608 | new->flags & IRQF_TRIGGER_MASK); |
| 480 | 609 | ||
| 481 | if (ret) { | 610 | if (ret) |
| 482 | spin_unlock_irqrestore(&desc->lock, flags); | 611 | goto out_thread; |
| 483 | return ret; | ||
| 484 | } | ||
| 485 | } else | 612 | } else |
| 486 | compat_irq_chip_set_default_handler(desc); | 613 | compat_irq_chip_set_default_handler(desc); |
| 487 | #if defined(CONFIG_IRQ_PER_CPU) | 614 | #if defined(CONFIG_IRQ_PER_CPU) |
| @@ -549,8 +676,19 @@ mismatch: | |||
| 549 | dump_stack(); | 676 | dump_stack(); |
| 550 | } | 677 | } |
| 551 | #endif | 678 | #endif |
| 679 | ret = -EBUSY; | ||
| 680 | |||
| 681 | out_thread: | ||
| 552 | spin_unlock_irqrestore(&desc->lock, flags); | 682 | spin_unlock_irqrestore(&desc->lock, flags); |
| 553 | return -EBUSY; | 683 | if (new->thread) { |
| 684 | struct task_struct *t = new->thread; | ||
| 685 | |||
| 686 | new->thread = NULL; | ||
| 687 | if (likely(!test_bit(IRQTF_DIED, &new->thread_flags))) | ||
| 688 | kthread_stop(t); | ||
| 689 | put_task_struct(t); | ||
| 690 | } | ||
| 691 | return ret; | ||
| 554 | } | 692 | } |
| 555 | 693 | ||
| 556 | /** | 694 | /** |
| @@ -576,6 +714,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) | |||
| 576 | { | 714 | { |
| 577 | struct irq_desc *desc = irq_to_desc(irq); | 715 | struct irq_desc *desc = irq_to_desc(irq); |
| 578 | struct irqaction *action, **action_ptr; | 716 | struct irqaction *action, **action_ptr; |
| 717 | struct task_struct *irqthread; | ||
| 579 | unsigned long flags; | 718 | unsigned long flags; |
| 580 | 719 | ||
| 581 | WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); | 720 | WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); |
| @@ -622,6 +761,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) | |||
| 622 | else | 761 | else |
| 623 | desc->chip->disable(irq); | 762 | desc->chip->disable(irq); |
| 624 | } | 763 | } |
| 764 | |||
| 765 | irqthread = action->thread; | ||
| 766 | action->thread = NULL; | ||
| 767 | |||
| 625 | spin_unlock_irqrestore(&desc->lock, flags); | 768 | spin_unlock_irqrestore(&desc->lock, flags); |
| 626 | 769 | ||
| 627 | unregister_handler_proc(irq, action); | 770 | unregister_handler_proc(irq, action); |
| @@ -629,6 +772,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) | |||
| 629 | /* Make sure it's not being used on another CPU: */ | 772 | /* Make sure it's not being used on another CPU: */ |
| 630 | synchronize_irq(irq); | 773 | synchronize_irq(irq); |
| 631 | 774 | ||
| 775 | if (irqthread) { | ||
| 776 | if (!test_bit(IRQTF_DIED, &action->thread_flags)) | ||
| 777 | kthread_stop(irqthread); | ||
| 778 | put_task_struct(irqthread); | ||
| 779 | } | ||
| 780 | |||
| 632 | #ifdef CONFIG_DEBUG_SHIRQ | 781 | #ifdef CONFIG_DEBUG_SHIRQ |
| 633 | /* | 782 | /* |
| 634 | * It's a shared IRQ -- the driver ought to be prepared for an IRQ | 783 | * It's a shared IRQ -- the driver ought to be prepared for an IRQ |
| @@ -681,9 +830,12 @@ void free_irq(unsigned int irq, void *dev_id) | |||
| 681 | EXPORT_SYMBOL(free_irq); | 830 | EXPORT_SYMBOL(free_irq); |
| 682 | 831 | ||
| 683 | /** | 832 | /** |
| 684 | * request_irq - allocate an interrupt line | 833 | * request_threaded_irq - allocate an interrupt line |
| 685 | * @irq: Interrupt line to allocate | 834 | * @irq: Interrupt line to allocate |
| 686 | * @handler: Function to be called when the IRQ occurs | 835 | * @handler: Function to be called when the IRQ occurs. |
| 836 | * Primary handler for threaded interrupts | ||
| 837 | * @thread_fn: Function called from the irq handler thread | ||
| 838 | * If NULL, no irq thread is created | ||
| 687 | * @irqflags: Interrupt type flags | 839 | * @irqflags: Interrupt type flags |
| 688 | * @devname: An ascii name for the claiming device | 840 | * @devname: An ascii name for the claiming device |
| 689 | * @dev_id: A cookie passed back to the handler function | 841 | * @dev_id: A cookie passed back to the handler function |
| @@ -695,6 +847,15 @@ EXPORT_SYMBOL(free_irq); | |||
| 695 | * raises, you must take care both to initialise your hardware | 847 | * raises, you must take care both to initialise your hardware |
| 696 | * and to set up the interrupt handler in the right order. | 848 | * and to set up the interrupt handler in the right order. |
| 697 | * | 849 | * |
| 850 | * If you want to set up a threaded irq handler for your device | ||
| 851 | * then you need to supply @handler and @thread_fn. @handler ist | ||
| 852 | * still called in hard interrupt context and has to check | ||
| 853 | * whether the interrupt originates from the device. If yes it | ||
| 854 | * needs to disable the interrupt on the device and return | ||
| 855 | * IRQ_THREAD_WAKE which will wake up the handler thread and run | ||
| 856 | * @thread_fn. This split handler design is necessary to support | ||
| 857 | * shared interrupts. | ||
| 858 | * | ||
| 698 | * Dev_id must be globally unique. Normally the address of the | 859 | * Dev_id must be globally unique. Normally the address of the |
| 699 | * device data structure is used as the cookie. Since the handler | 860 | * device data structure is used as the cookie. Since the handler |
| 700 | * receives this value it makes sense to use it. | 861 | * receives this value it makes sense to use it. |
| @@ -710,8 +871,9 @@ EXPORT_SYMBOL(free_irq); | |||
| 710 | * IRQF_TRIGGER_* Specify active edge(s) or level | 871 | * IRQF_TRIGGER_* Specify active edge(s) or level |
| 711 | * | 872 | * |
| 712 | */ | 873 | */ |
| 713 | int request_irq(unsigned int irq, irq_handler_t handler, | 874 | int request_threaded_irq(unsigned int irq, irq_handler_t handler, |
| 714 | unsigned long irqflags, const char *devname, void *dev_id) | 875 | irq_handler_t thread_fn, unsigned long irqflags, |
| 876 | const char *devname, void *dev_id) | ||
| 715 | { | 877 | { |
| 716 | struct irqaction *action; | 878 | struct irqaction *action; |
| 717 | struct irq_desc *desc; | 879 | struct irq_desc *desc; |
| @@ -759,6 +921,7 @@ int request_irq(unsigned int irq, irq_handler_t handler, | |||
| 759 | return -ENOMEM; | 921 | return -ENOMEM; |
| 760 | 922 | ||
| 761 | action->handler = handler; | 923 | action->handler = handler; |
| 924 | action->thread_fn = thread_fn; | ||
| 762 | action->flags = irqflags; | 925 | action->flags = irqflags; |
| 763 | action->name = devname; | 926 | action->name = devname; |
| 764 | action->dev_id = dev_id; | 927 | action->dev_id = dev_id; |
| @@ -788,4 +951,4 @@ int request_irq(unsigned int irq, irq_handler_t handler, | |||
| 788 | #endif | 951 | #endif |
| 789 | return retval; | 952 | return retval; |
| 790 | } | 953 | } |
| 791 | EXPORT_SYMBOL(request_irq); | 954 | EXPORT_SYMBOL(request_threaded_irq); |
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5016bfb682b9..a5e74ddee0e2 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
| @@ -68,7 +68,7 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; | |||
| 68 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; | 68 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; |
| 69 | 69 | ||
| 70 | /* NOTE: change this value only with kprobe_mutex held */ | 70 | /* NOTE: change this value only with kprobe_mutex held */ |
| 71 | static bool kprobe_enabled; | 71 | static bool kprobes_all_disarmed; |
| 72 | 72 | ||
| 73 | static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ | 73 | static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ |
| 74 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; | 74 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; |
| @@ -328,7 +328,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) | |||
| 328 | struct kprobe *kp; | 328 | struct kprobe *kp; |
| 329 | 329 | ||
| 330 | list_for_each_entry_rcu(kp, &p->list, list) { | 330 | list_for_each_entry_rcu(kp, &p->list, list) { |
| 331 | if (kp->pre_handler && !kprobe_gone(kp)) { | 331 | if (kp->pre_handler && likely(!kprobe_disabled(kp))) { |
| 332 | set_kprobe_instance(kp); | 332 | set_kprobe_instance(kp); |
| 333 | if (kp->pre_handler(kp, regs)) | 333 | if (kp->pre_handler(kp, regs)) |
| 334 | return 1; | 334 | return 1; |
| @@ -344,7 +344,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, | |||
| 344 | struct kprobe *kp; | 344 | struct kprobe *kp; |
| 345 | 345 | ||
| 346 | list_for_each_entry_rcu(kp, &p->list, list) { | 346 | list_for_each_entry_rcu(kp, &p->list, list) { |
| 347 | if (kp->post_handler && !kprobe_gone(kp)) { | 347 | if (kp->post_handler && likely(!kprobe_disabled(kp))) { |
| 348 | set_kprobe_instance(kp); | 348 | set_kprobe_instance(kp); |
| 349 | kp->post_handler(kp, regs, flags); | 349 | kp->post_handler(kp, regs, flags); |
| 350 | reset_kprobe_instance(); | 350 | reset_kprobe_instance(); |
| @@ -518,20 +518,28 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) | |||
| 518 | } | 518 | } |
| 519 | 519 | ||
| 520 | /* | 520 | /* |
| 521 | * Add the new probe to old_p->list. Fail if this is the | 521 | * Add the new probe to ap->list. Fail if this is the |
| 522 | * second jprobe at the address - two jprobes can't coexist | 522 | * second jprobe at the address - two jprobes can't coexist |
| 523 | */ | 523 | */ |
| 524 | static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) | 524 | static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) |
| 525 | { | 525 | { |
| 526 | BUG_ON(kprobe_gone(ap) || kprobe_gone(p)); | ||
| 526 | if (p->break_handler) { | 527 | if (p->break_handler) { |
| 527 | if (old_p->break_handler) | 528 | if (ap->break_handler) |
| 528 | return -EEXIST; | 529 | return -EEXIST; |
| 529 | list_add_tail_rcu(&p->list, &old_p->list); | 530 | list_add_tail_rcu(&p->list, &ap->list); |
| 530 | old_p->break_handler = aggr_break_handler; | 531 | ap->break_handler = aggr_break_handler; |
| 531 | } else | 532 | } else |
| 532 | list_add_rcu(&p->list, &old_p->list); | 533 | list_add_rcu(&p->list, &ap->list); |
| 533 | if (p->post_handler && !old_p->post_handler) | 534 | if (p->post_handler && !ap->post_handler) |
| 534 | old_p->post_handler = aggr_post_handler; | 535 | ap->post_handler = aggr_post_handler; |
| 536 | |||
| 537 | if (kprobe_disabled(ap) && !kprobe_disabled(p)) { | ||
| 538 | ap->flags &= ~KPROBE_FLAG_DISABLED; | ||
| 539 | if (!kprobes_all_disarmed) | ||
| 540 | /* Arm the breakpoint again. */ | ||
| 541 | arch_arm_kprobe(ap); | ||
| 542 | } | ||
| 535 | return 0; | 543 | return 0; |
| 536 | } | 544 | } |
| 537 | 545 | ||
| @@ -544,6 +552,7 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) | |||
| 544 | copy_kprobe(p, ap); | 552 | copy_kprobe(p, ap); |
| 545 | flush_insn_slot(ap); | 553 | flush_insn_slot(ap); |
| 546 | ap->addr = p->addr; | 554 | ap->addr = p->addr; |
| 555 | ap->flags = p->flags; | ||
| 547 | ap->pre_handler = aggr_pre_handler; | 556 | ap->pre_handler = aggr_pre_handler; |
| 548 | ap->fault_handler = aggr_fault_handler; | 557 | ap->fault_handler = aggr_fault_handler; |
| 549 | /* We don't care the kprobe which has gone. */ | 558 | /* We don't care the kprobe which has gone. */ |
| @@ -566,44 +575,59 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, | |||
| 566 | struct kprobe *p) | 575 | struct kprobe *p) |
| 567 | { | 576 | { |
| 568 | int ret = 0; | 577 | int ret = 0; |
| 569 | struct kprobe *ap; | 578 | struct kprobe *ap = old_p; |
| 570 | 579 | ||
| 571 | if (kprobe_gone(old_p)) { | 580 | if (old_p->pre_handler != aggr_pre_handler) { |
| 581 | /* If old_p is not an aggr_probe, create new aggr_kprobe. */ | ||
| 582 | ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); | ||
| 583 | if (!ap) | ||
| 584 | return -ENOMEM; | ||
| 585 | add_aggr_kprobe(ap, old_p); | ||
| 586 | } | ||
| 587 | |||
| 588 | if (kprobe_gone(ap)) { | ||
| 572 | /* | 589 | /* |
| 573 | * Attempting to insert new probe at the same location that | 590 | * Attempting to insert new probe at the same location that |
| 574 | * had a probe in the module vaddr area which already | 591 | * had a probe in the module vaddr area which already |
| 575 | * freed. So, the instruction slot has already been | 592 | * freed. So, the instruction slot has already been |
| 576 | * released. We need a new slot for the new probe. | 593 | * released. We need a new slot for the new probe. |
| 577 | */ | 594 | */ |
| 578 | ret = arch_prepare_kprobe(old_p); | 595 | ret = arch_prepare_kprobe(ap); |
| 579 | if (ret) | 596 | if (ret) |
| 597 | /* | ||
| 598 | * Even if fail to allocate new slot, don't need to | ||
| 599 | * free aggr_probe. It will be used next time, or | ||
| 600 | * freed by unregister_kprobe. | ||
| 601 | */ | ||
| 580 | return ret; | 602 | return ret; |
| 581 | } | 603 | |
| 582 | if (old_p->pre_handler == aggr_pre_handler) { | ||
| 583 | copy_kprobe(old_p, p); | ||
| 584 | ret = add_new_kprobe(old_p, p); | ||
| 585 | ap = old_p; | ||
| 586 | } else { | ||
| 587 | ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); | ||
| 588 | if (!ap) { | ||
| 589 | if (kprobe_gone(old_p)) | ||
| 590 | arch_remove_kprobe(old_p); | ||
| 591 | return -ENOMEM; | ||
| 592 | } | ||
| 593 | add_aggr_kprobe(ap, old_p); | ||
| 594 | copy_kprobe(ap, p); | ||
| 595 | ret = add_new_kprobe(ap, p); | ||
| 596 | } | ||
| 597 | if (kprobe_gone(old_p)) { | ||
| 598 | /* | 604 | /* |
| 599 | * If the old_p has gone, its breakpoint has been disarmed. | 605 | * Clear gone flag to prevent allocating new slot again, and |
| 600 | * We have to arm it again after preparing real kprobes. | 606 | * set disabled flag because it is not armed yet. |
| 601 | */ | 607 | */ |
| 602 | ap->flags &= ~KPROBE_FLAG_GONE; | 608 | ap->flags = (ap->flags & ~KPROBE_FLAG_GONE) |
| 603 | if (kprobe_enabled) | 609 | | KPROBE_FLAG_DISABLED; |
| 604 | arch_arm_kprobe(ap); | ||
| 605 | } | 610 | } |
| 606 | return ret; | 611 | |
| 612 | copy_kprobe(ap, p); | ||
| 613 | return add_new_kprobe(ap, p); | ||
| 614 | } | ||
| 615 | |||
| 616 | /* Try to disable aggr_kprobe, and return 1 if succeeded.*/ | ||
| 617 | static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p) | ||
| 618 | { | ||
| 619 | struct kprobe *kp; | ||
| 620 | |||
| 621 | list_for_each_entry_rcu(kp, &p->list, list) { | ||
| 622 | if (!kprobe_disabled(kp)) | ||
| 623 | /* | ||
| 624 | * There is an active probe on the list. | ||
| 625 | * We can't disable aggr_kprobe. | ||
| 626 | */ | ||
| 627 | return 0; | ||
| 628 | } | ||
| 629 | p->flags |= KPROBE_FLAG_DISABLED; | ||
| 630 | return 1; | ||
| 607 | } | 631 | } |
| 608 | 632 | ||
| 609 | static int __kprobes in_kprobes_functions(unsigned long addr) | 633 | static int __kprobes in_kprobes_functions(unsigned long addr) |
| @@ -664,7 +688,9 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
| 664 | return -EINVAL; | 688 | return -EINVAL; |
| 665 | } | 689 | } |
| 666 | 690 | ||
| 667 | p->flags = 0; | 691 | /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ |
| 692 | p->flags &= KPROBE_FLAG_DISABLED; | ||
| 693 | |||
| 668 | /* | 694 | /* |
| 669 | * Check if are we probing a module. | 695 | * Check if are we probing a module. |
| 670 | */ | 696 | */ |
| @@ -709,7 +735,7 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
| 709 | hlist_add_head_rcu(&p->hlist, | 735 | hlist_add_head_rcu(&p->hlist, |
| 710 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); | 736 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); |
| 711 | 737 | ||
| 712 | if (kprobe_enabled) | 738 | if (!kprobes_all_disarmed && !kprobe_disabled(p)) |
| 713 | arch_arm_kprobe(p); | 739 | arch_arm_kprobe(p); |
| 714 | 740 | ||
| 715 | out_unlock_text: | 741 | out_unlock_text: |
| @@ -722,26 +748,39 @@ out: | |||
| 722 | 748 | ||
| 723 | return ret; | 749 | return ret; |
| 724 | } | 750 | } |
| 751 | EXPORT_SYMBOL_GPL(register_kprobe); | ||
| 725 | 752 | ||
| 726 | /* | 753 | /* Check passed kprobe is valid and return kprobe in kprobe_table. */ |
| 727 | * Unregister a kprobe without a scheduler synchronization. | 754 | static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) |
| 728 | */ | ||
| 729 | static int __kprobes __unregister_kprobe_top(struct kprobe *p) | ||
| 730 | { | 755 | { |
| 731 | struct kprobe *old_p, *list_p; | 756 | struct kprobe *old_p, *list_p; |
| 732 | 757 | ||
| 733 | old_p = get_kprobe(p->addr); | 758 | old_p = get_kprobe(p->addr); |
| 734 | if (unlikely(!old_p)) | 759 | if (unlikely(!old_p)) |
| 735 | return -EINVAL; | 760 | return NULL; |
| 736 | 761 | ||
| 737 | if (p != old_p) { | 762 | if (p != old_p) { |
| 738 | list_for_each_entry_rcu(list_p, &old_p->list, list) | 763 | list_for_each_entry_rcu(list_p, &old_p->list, list) |
| 739 | if (list_p == p) | 764 | if (list_p == p) |
| 740 | /* kprobe p is a valid probe */ | 765 | /* kprobe p is a valid probe */ |
| 741 | goto valid_p; | 766 | goto valid; |
| 742 | return -EINVAL; | 767 | return NULL; |
| 743 | } | 768 | } |
| 744 | valid_p: | 769 | valid: |
| 770 | return old_p; | ||
| 771 | } | ||
| 772 | |||
| 773 | /* | ||
| 774 | * Unregister a kprobe without a scheduler synchronization. | ||
| 775 | */ | ||
| 776 | static int __kprobes __unregister_kprobe_top(struct kprobe *p) | ||
| 777 | { | ||
| 778 | struct kprobe *old_p, *list_p; | ||
| 779 | |||
| 780 | old_p = __get_valid_kprobe(p); | ||
| 781 | if (old_p == NULL) | ||
| 782 | return -EINVAL; | ||
| 783 | |||
| 745 | if (old_p == p || | 784 | if (old_p == p || |
| 746 | (old_p->pre_handler == aggr_pre_handler && | 785 | (old_p->pre_handler == aggr_pre_handler && |
| 747 | list_is_singular(&old_p->list))) { | 786 | list_is_singular(&old_p->list))) { |
| @@ -750,7 +789,7 @@ valid_p: | |||
| 750 | * enabled and not gone - otherwise, the breakpoint would | 789 | * enabled and not gone - otherwise, the breakpoint would |
| 751 | * already have been removed. We save on flushing icache. | 790 | * already have been removed. We save on flushing icache. |
| 752 | */ | 791 | */ |
| 753 | if (kprobe_enabled && !kprobe_gone(old_p)) { | 792 | if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) { |
| 754 | mutex_lock(&text_mutex); | 793 | mutex_lock(&text_mutex); |
| 755 | arch_disarm_kprobe(p); | 794 | arch_disarm_kprobe(p); |
| 756 | mutex_unlock(&text_mutex); | 795 | mutex_unlock(&text_mutex); |
| @@ -768,6 +807,11 @@ valid_p: | |||
| 768 | } | 807 | } |
| 769 | noclean: | 808 | noclean: |
| 770 | list_del_rcu(&p->list); | 809 | list_del_rcu(&p->list); |
| 810 | if (!kprobe_disabled(old_p)) { | ||
| 811 | try_to_disable_aggr_kprobe(old_p); | ||
| 812 | if (!kprobes_all_disarmed && kprobe_disabled(old_p)) | ||
| 813 | arch_disarm_kprobe(old_p); | ||
| 814 | } | ||
| 771 | } | 815 | } |
| 772 | return 0; | 816 | return 0; |
| 773 | } | 817 | } |
| @@ -803,11 +847,13 @@ int __kprobes register_kprobes(struct kprobe **kps, int num) | |||
| 803 | } | 847 | } |
| 804 | return ret; | 848 | return ret; |
| 805 | } | 849 | } |
| 850 | EXPORT_SYMBOL_GPL(register_kprobes); | ||
| 806 | 851 | ||
| 807 | void __kprobes unregister_kprobe(struct kprobe *p) | 852 | void __kprobes unregister_kprobe(struct kprobe *p) |
| 808 | { | 853 | { |
| 809 | unregister_kprobes(&p, 1); | 854 | unregister_kprobes(&p, 1); |
| 810 | } | 855 | } |
| 856 | EXPORT_SYMBOL_GPL(unregister_kprobe); | ||
| 811 | 857 | ||
| 812 | void __kprobes unregister_kprobes(struct kprobe **kps, int num) | 858 | void __kprobes unregister_kprobes(struct kprobe **kps, int num) |
| 813 | { | 859 | { |
| @@ -826,6 +872,7 @@ void __kprobes unregister_kprobes(struct kprobe **kps, int num) | |||
| 826 | if (kps[i]->addr) | 872 | if (kps[i]->addr) |
| 827 | __unregister_kprobe_bottom(kps[i]); | 873 | __unregister_kprobe_bottom(kps[i]); |
| 828 | } | 874 | } |
| 875 | EXPORT_SYMBOL_GPL(unregister_kprobes); | ||
| 829 | 876 | ||
| 830 | static struct notifier_block kprobe_exceptions_nb = { | 877 | static struct notifier_block kprobe_exceptions_nb = { |
| 831 | .notifier_call = kprobe_exceptions_notify, | 878 | .notifier_call = kprobe_exceptions_notify, |
| @@ -865,16 +912,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num) | |||
| 865 | } | 912 | } |
| 866 | return ret; | 913 | return ret; |
| 867 | } | 914 | } |
| 915 | EXPORT_SYMBOL_GPL(register_jprobes); | ||
| 868 | 916 | ||
| 869 | int __kprobes register_jprobe(struct jprobe *jp) | 917 | int __kprobes register_jprobe(struct jprobe *jp) |
| 870 | { | 918 | { |
| 871 | return register_jprobes(&jp, 1); | 919 | return register_jprobes(&jp, 1); |
| 872 | } | 920 | } |
| 921 | EXPORT_SYMBOL_GPL(register_jprobe); | ||
| 873 | 922 | ||
| 874 | void __kprobes unregister_jprobe(struct jprobe *jp) | 923 | void __kprobes unregister_jprobe(struct jprobe *jp) |
| 875 | { | 924 | { |
| 876 | unregister_jprobes(&jp, 1); | 925 | unregister_jprobes(&jp, 1); |
| 877 | } | 926 | } |
| 927 | EXPORT_SYMBOL_GPL(unregister_jprobe); | ||
| 878 | 928 | ||
| 879 | void __kprobes unregister_jprobes(struct jprobe **jps, int num) | 929 | void __kprobes unregister_jprobes(struct jprobe **jps, int num) |
| 880 | { | 930 | { |
| @@ -894,6 +944,7 @@ void __kprobes unregister_jprobes(struct jprobe **jps, int num) | |||
| 894 | __unregister_kprobe_bottom(&jps[i]->kp); | 944 | __unregister_kprobe_bottom(&jps[i]->kp); |
| 895 | } | 945 | } |
| 896 | } | 946 | } |
| 947 | EXPORT_SYMBOL_GPL(unregister_jprobes); | ||
| 897 | 948 | ||
| 898 | #ifdef CONFIG_KRETPROBES | 949 | #ifdef CONFIG_KRETPROBES |
| 899 | /* | 950 | /* |
| @@ -987,6 +1038,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp) | |||
| 987 | free_rp_inst(rp); | 1038 | free_rp_inst(rp); |
| 988 | return ret; | 1039 | return ret; |
| 989 | } | 1040 | } |
| 1041 | EXPORT_SYMBOL_GPL(register_kretprobe); | ||
| 990 | 1042 | ||
| 991 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) | 1043 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) |
| 992 | { | 1044 | { |
| @@ -1004,11 +1056,13 @@ int __kprobes register_kretprobes(struct kretprobe **rps, int num) | |||
| 1004 | } | 1056 | } |
| 1005 | return ret; | 1057 | return ret; |
| 1006 | } | 1058 | } |
| 1059 | EXPORT_SYMBOL_GPL(register_kretprobes); | ||
| 1007 | 1060 | ||
| 1008 | void __kprobes unregister_kretprobe(struct kretprobe *rp) | 1061 | void __kprobes unregister_kretprobe(struct kretprobe *rp) |
| 1009 | { | 1062 | { |
| 1010 | unregister_kretprobes(&rp, 1); | 1063 | unregister_kretprobes(&rp, 1); |
| 1011 | } | 1064 | } |
| 1065 | EXPORT_SYMBOL_GPL(unregister_kretprobe); | ||
| 1012 | 1066 | ||
| 1013 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) | 1067 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) |
| 1014 | { | 1068 | { |
| @@ -1030,24 +1084,30 @@ void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) | |||
| 1030 | } | 1084 | } |
| 1031 | } | 1085 | } |
| 1032 | } | 1086 | } |
| 1087 | EXPORT_SYMBOL_GPL(unregister_kretprobes); | ||
| 1033 | 1088 | ||
| 1034 | #else /* CONFIG_KRETPROBES */ | 1089 | #else /* CONFIG_KRETPROBES */ |
| 1035 | int __kprobes register_kretprobe(struct kretprobe *rp) | 1090 | int __kprobes register_kretprobe(struct kretprobe *rp) |
| 1036 | { | 1091 | { |
| 1037 | return -ENOSYS; | 1092 | return -ENOSYS; |
| 1038 | } | 1093 | } |
| 1094 | EXPORT_SYMBOL_GPL(register_kretprobe); | ||
| 1039 | 1095 | ||
| 1040 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) | 1096 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) |
| 1041 | { | 1097 | { |
| 1042 | return -ENOSYS; | 1098 | return -ENOSYS; |
| 1043 | } | 1099 | } |
| 1100 | EXPORT_SYMBOL_GPL(register_kretprobes); | ||
| 1101 | |||
| 1044 | void __kprobes unregister_kretprobe(struct kretprobe *rp) | 1102 | void __kprobes unregister_kretprobe(struct kretprobe *rp) |
| 1045 | { | 1103 | { |
| 1046 | } | 1104 | } |
| 1105 | EXPORT_SYMBOL_GPL(unregister_kretprobe); | ||
| 1047 | 1106 | ||
| 1048 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) | 1107 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) |
| 1049 | { | 1108 | { |
| 1050 | } | 1109 | } |
| 1110 | EXPORT_SYMBOL_GPL(unregister_kretprobes); | ||
| 1051 | 1111 | ||
| 1052 | static int __kprobes pre_handler_kretprobe(struct kprobe *p, | 1112 | static int __kprobes pre_handler_kretprobe(struct kprobe *p, |
| 1053 | struct pt_regs *regs) | 1113 | struct pt_regs *regs) |
| @@ -1061,6 +1121,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, | |||
| 1061 | static void __kprobes kill_kprobe(struct kprobe *p) | 1121 | static void __kprobes kill_kprobe(struct kprobe *p) |
| 1062 | { | 1122 | { |
| 1063 | struct kprobe *kp; | 1123 | struct kprobe *kp; |
| 1124 | |||
| 1064 | p->flags |= KPROBE_FLAG_GONE; | 1125 | p->flags |= KPROBE_FLAG_GONE; |
| 1065 | if (p->pre_handler == aggr_pre_handler) { | 1126 | if (p->pre_handler == aggr_pre_handler) { |
| 1066 | /* | 1127 | /* |
| @@ -1173,8 +1234,8 @@ static int __init init_kprobes(void) | |||
| 1173 | } | 1234 | } |
| 1174 | } | 1235 | } |
| 1175 | 1236 | ||
| 1176 | /* By default, kprobes are enabled */ | 1237 | /* By default, kprobes are armed */ |
| 1177 | kprobe_enabled = true; | 1238 | kprobes_all_disarmed = false; |
| 1178 | 1239 | ||
| 1179 | err = arch_init_kprobes(); | 1240 | err = arch_init_kprobes(); |
| 1180 | if (!err) | 1241 | if (!err) |
| @@ -1202,12 +1263,18 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, | |||
| 1202 | else | 1263 | else |
| 1203 | kprobe_type = "k"; | 1264 | kprobe_type = "k"; |
| 1204 | if (sym) | 1265 | if (sym) |
| 1205 | seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type, | 1266 | seq_printf(pi, "%p %s %s+0x%x %s %s%s\n", |
| 1206 | sym, offset, (modname ? modname : " "), | 1267 | p->addr, kprobe_type, sym, offset, |
| 1207 | (kprobe_gone(p) ? "[GONE]" : "")); | 1268 | (modname ? modname : " "), |
| 1269 | (kprobe_gone(p) ? "[GONE]" : ""), | ||
| 1270 | ((kprobe_disabled(p) && !kprobe_gone(p)) ? | ||
| 1271 | "[DISABLED]" : "")); | ||
| 1208 | else | 1272 | else |
| 1209 | seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr, | 1273 | seq_printf(pi, "%p %s %p %s%s\n", |
| 1210 | (kprobe_gone(p) ? "[GONE]" : "")); | 1274 | p->addr, kprobe_type, p->addr, |
| 1275 | (kprobe_gone(p) ? "[GONE]" : ""), | ||
| 1276 | ((kprobe_disabled(p) && !kprobe_gone(p)) ? | ||
| 1277 | "[DISABLED]" : "")); | ||
| 1211 | } | 1278 | } |
| 1212 | 1279 | ||
| 1213 | static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) | 1280 | static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) |
| @@ -1272,7 +1339,72 @@ static struct file_operations debugfs_kprobes_operations = { | |||
| 1272 | .release = seq_release, | 1339 | .release = seq_release, |
| 1273 | }; | 1340 | }; |
| 1274 | 1341 | ||
| 1275 | static void __kprobes enable_all_kprobes(void) | 1342 | /* Disable one kprobe */ |
| 1343 | int __kprobes disable_kprobe(struct kprobe *kp) | ||
| 1344 | { | ||
| 1345 | int ret = 0; | ||
| 1346 | struct kprobe *p; | ||
| 1347 | |||
| 1348 | mutex_lock(&kprobe_mutex); | ||
| 1349 | |||
| 1350 | /* Check whether specified probe is valid. */ | ||
| 1351 | p = __get_valid_kprobe(kp); | ||
| 1352 | if (unlikely(p == NULL)) { | ||
| 1353 | ret = -EINVAL; | ||
| 1354 | goto out; | ||
| 1355 | } | ||
| 1356 | |||
| 1357 | /* If the probe is already disabled (or gone), just return */ | ||
| 1358 | if (kprobe_disabled(kp)) | ||
| 1359 | goto out; | ||
| 1360 | |||
| 1361 | kp->flags |= KPROBE_FLAG_DISABLED; | ||
| 1362 | if (p != kp) | ||
| 1363 | /* When kp != p, p is always enabled. */ | ||
| 1364 | try_to_disable_aggr_kprobe(p); | ||
| 1365 | |||
| 1366 | if (!kprobes_all_disarmed && kprobe_disabled(p)) | ||
| 1367 | arch_disarm_kprobe(p); | ||
| 1368 | out: | ||
| 1369 | mutex_unlock(&kprobe_mutex); | ||
| 1370 | return ret; | ||
| 1371 | } | ||
| 1372 | EXPORT_SYMBOL_GPL(disable_kprobe); | ||
| 1373 | |||
| 1374 | /* Enable one kprobe */ | ||
| 1375 | int __kprobes enable_kprobe(struct kprobe *kp) | ||
| 1376 | { | ||
| 1377 | int ret = 0; | ||
| 1378 | struct kprobe *p; | ||
| 1379 | |||
| 1380 | mutex_lock(&kprobe_mutex); | ||
| 1381 | |||
| 1382 | /* Check whether specified probe is valid. */ | ||
| 1383 | p = __get_valid_kprobe(kp); | ||
| 1384 | if (unlikely(p == NULL)) { | ||
| 1385 | ret = -EINVAL; | ||
| 1386 | goto out; | ||
| 1387 | } | ||
| 1388 | |||
| 1389 | if (kprobe_gone(kp)) { | ||
| 1390 | /* This kprobe has gone, we couldn't enable it. */ | ||
| 1391 | ret = -EINVAL; | ||
| 1392 | goto out; | ||
| 1393 | } | ||
| 1394 | |||
| 1395 | if (!kprobes_all_disarmed && kprobe_disabled(p)) | ||
| 1396 | arch_arm_kprobe(p); | ||
| 1397 | |||
| 1398 | p->flags &= ~KPROBE_FLAG_DISABLED; | ||
| 1399 | if (p != kp) | ||
| 1400 | kp->flags &= ~KPROBE_FLAG_DISABLED; | ||
| 1401 | out: | ||
| 1402 | mutex_unlock(&kprobe_mutex); | ||
| 1403 | return ret; | ||
| 1404 | } | ||
| 1405 | EXPORT_SYMBOL_GPL(enable_kprobe); | ||
| 1406 | |||
| 1407 | static void __kprobes arm_all_kprobes(void) | ||
| 1276 | { | 1408 | { |
| 1277 | struct hlist_head *head; | 1409 | struct hlist_head *head; |
| 1278 | struct hlist_node *node; | 1410 | struct hlist_node *node; |
| @@ -1281,20 +1413,20 @@ static void __kprobes enable_all_kprobes(void) | |||
| 1281 | 1413 | ||
| 1282 | mutex_lock(&kprobe_mutex); | 1414 | mutex_lock(&kprobe_mutex); |
| 1283 | 1415 | ||
| 1284 | /* If kprobes are already enabled, just return */ | 1416 | /* If kprobes are armed, just return */ |
| 1285 | if (kprobe_enabled) | 1417 | if (!kprobes_all_disarmed) |
| 1286 | goto already_enabled; | 1418 | goto already_enabled; |
| 1287 | 1419 | ||
| 1288 | mutex_lock(&text_mutex); | 1420 | mutex_lock(&text_mutex); |
| 1289 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 1421 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
| 1290 | head = &kprobe_table[i]; | 1422 | head = &kprobe_table[i]; |
| 1291 | hlist_for_each_entry_rcu(p, node, head, hlist) | 1423 | hlist_for_each_entry_rcu(p, node, head, hlist) |
| 1292 | if (!kprobe_gone(p)) | 1424 | if (!kprobe_disabled(p)) |
| 1293 | arch_arm_kprobe(p); | 1425 | arch_arm_kprobe(p); |
| 1294 | } | 1426 | } |
| 1295 | mutex_unlock(&text_mutex); | 1427 | mutex_unlock(&text_mutex); |
| 1296 | 1428 | ||
| 1297 | kprobe_enabled = true; | 1429 | kprobes_all_disarmed = false; |
| 1298 | printk(KERN_INFO "Kprobes globally enabled\n"); | 1430 | printk(KERN_INFO "Kprobes globally enabled\n"); |
| 1299 | 1431 | ||
| 1300 | already_enabled: | 1432 | already_enabled: |
| @@ -1302,7 +1434,7 @@ already_enabled: | |||
| 1302 | return; | 1434 | return; |
| 1303 | } | 1435 | } |
| 1304 | 1436 | ||
| 1305 | static void __kprobes disable_all_kprobes(void) | 1437 | static void __kprobes disarm_all_kprobes(void) |
| 1306 | { | 1438 | { |
| 1307 | struct hlist_head *head; | 1439 | struct hlist_head *head; |
| 1308 | struct hlist_node *node; | 1440 | struct hlist_node *node; |
| @@ -1311,17 +1443,17 @@ static void __kprobes disable_all_kprobes(void) | |||
| 1311 | 1443 | ||
| 1312 | mutex_lock(&kprobe_mutex); | 1444 | mutex_lock(&kprobe_mutex); |
| 1313 | 1445 | ||
| 1314 | /* If kprobes are already disabled, just return */ | 1446 | /* If kprobes are already disarmed, just return */ |
| 1315 | if (!kprobe_enabled) | 1447 | if (kprobes_all_disarmed) |
| 1316 | goto already_disabled; | 1448 | goto already_disabled; |
| 1317 | 1449 | ||
| 1318 | kprobe_enabled = false; | 1450 | kprobes_all_disarmed = true; |
| 1319 | printk(KERN_INFO "Kprobes globally disabled\n"); | 1451 | printk(KERN_INFO "Kprobes globally disabled\n"); |
| 1320 | mutex_lock(&text_mutex); | 1452 | mutex_lock(&text_mutex); |
| 1321 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 1453 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
| 1322 | head = &kprobe_table[i]; | 1454 | head = &kprobe_table[i]; |
| 1323 | hlist_for_each_entry_rcu(p, node, head, hlist) { | 1455 | hlist_for_each_entry_rcu(p, node, head, hlist) { |
| 1324 | if (!arch_trampoline_kprobe(p) && !kprobe_gone(p)) | 1456 | if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) |
| 1325 | arch_disarm_kprobe(p); | 1457 | arch_disarm_kprobe(p); |
| 1326 | } | 1458 | } |
| 1327 | } | 1459 | } |
| @@ -1347,7 +1479,7 @@ static ssize_t read_enabled_file_bool(struct file *file, | |||
| 1347 | { | 1479 | { |
| 1348 | char buf[3]; | 1480 | char buf[3]; |
| 1349 | 1481 | ||
| 1350 | if (kprobe_enabled) | 1482 | if (!kprobes_all_disarmed) |
| 1351 | buf[0] = '1'; | 1483 | buf[0] = '1'; |
| 1352 | else | 1484 | else |
| 1353 | buf[0] = '0'; | 1485 | buf[0] = '0'; |
| @@ -1370,12 +1502,12 @@ static ssize_t write_enabled_file_bool(struct file *file, | |||
| 1370 | case 'y': | 1502 | case 'y': |
| 1371 | case 'Y': | 1503 | case 'Y': |
| 1372 | case '1': | 1504 | case '1': |
| 1373 | enable_all_kprobes(); | 1505 | arm_all_kprobes(); |
| 1374 | break; | 1506 | break; |
| 1375 | case 'n': | 1507 | case 'n': |
| 1376 | case 'N': | 1508 | case 'N': |
| 1377 | case '0': | 1509 | case '0': |
| 1378 | disable_all_kprobes(); | 1510 | disarm_all_kprobes(); |
| 1379 | break; | 1511 | break; |
| 1380 | } | 1512 | } |
| 1381 | 1513 | ||
| @@ -1418,16 +1550,5 @@ late_initcall(debugfs_kprobe_init); | |||
| 1418 | 1550 | ||
| 1419 | module_init(init_kprobes); | 1551 | module_init(init_kprobes); |
| 1420 | 1552 | ||
| 1421 | EXPORT_SYMBOL_GPL(register_kprobe); | 1553 | /* defined in arch/.../kernel/kprobes.c */ |
| 1422 | EXPORT_SYMBOL_GPL(unregister_kprobe); | ||
| 1423 | EXPORT_SYMBOL_GPL(register_kprobes); | ||
| 1424 | EXPORT_SYMBOL_GPL(unregister_kprobes); | ||
| 1425 | EXPORT_SYMBOL_GPL(register_jprobe); | ||
| 1426 | EXPORT_SYMBOL_GPL(unregister_jprobe); | ||
| 1427 | EXPORT_SYMBOL_GPL(register_jprobes); | ||
| 1428 | EXPORT_SYMBOL_GPL(unregister_jprobes); | ||
| 1429 | EXPORT_SYMBOL_GPL(jprobe_return); | 1554 | EXPORT_SYMBOL_GPL(jprobe_return); |
| 1430 | EXPORT_SYMBOL_GPL(register_kretprobe); | ||
| 1431 | EXPORT_SYMBOL_GPL(unregister_kretprobe); | ||
| 1432 | EXPORT_SYMBOL_GPL(register_kretprobes); | ||
| 1433 | EXPORT_SYMBOL_GPL(unregister_kretprobes); | ||
diff --git a/kernel/module.c b/kernel/module.c index c268a771595c..05f014efa32c 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -1952,9 +1952,6 @@ static noinline struct module *load_module(void __user *umod, | |||
| 1952 | if (strstarts(secstrings+sechdrs[i].sh_name, ".exit")) | 1952 | if (strstarts(secstrings+sechdrs[i].sh_name, ".exit")) |
| 1953 | sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; | 1953 | sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; |
| 1954 | #endif | 1954 | #endif |
| 1955 | /* Don't keep __versions around; it's just for loading. */ | ||
| 1956 | if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0) | ||
| 1957 | sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; | ||
| 1958 | } | 1955 | } |
| 1959 | 1956 | ||
| 1960 | modindex = find_sec(hdr, sechdrs, secstrings, | 1957 | modindex = find_sec(hdr, sechdrs, secstrings, |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 85d5a2455103..88796c330838 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
| @@ -166,97 +166,11 @@ void softlockup_tick(void) | |||
| 166 | } | 166 | } |
| 167 | 167 | ||
| 168 | /* | 168 | /* |
| 169 | * Have a reasonable limit on the number of tasks checked: | ||
| 170 | */ | ||
| 171 | unsigned long __read_mostly sysctl_hung_task_check_count = 1024; | ||
| 172 | |||
| 173 | /* | ||
| 174 | * Zero means infinite timeout - no checking done: | ||
| 175 | */ | ||
| 176 | unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480; | ||
| 177 | |||
| 178 | unsigned long __read_mostly sysctl_hung_task_warnings = 10; | ||
| 179 | |||
| 180 | /* | ||
| 181 | * Only do the hung-tasks check on one CPU: | ||
| 182 | */ | ||
| 183 | static int check_cpu __read_mostly = -1; | ||
| 184 | |||
| 185 | static void check_hung_task(struct task_struct *t, unsigned long now) | ||
| 186 | { | ||
| 187 | unsigned long switch_count = t->nvcsw + t->nivcsw; | ||
| 188 | |||
| 189 | if (t->flags & PF_FROZEN) | ||
| 190 | return; | ||
| 191 | |||
| 192 | if (switch_count != t->last_switch_count || !t->last_switch_timestamp) { | ||
| 193 | t->last_switch_count = switch_count; | ||
| 194 | t->last_switch_timestamp = now; | ||
| 195 | return; | ||
| 196 | } | ||
| 197 | if ((long)(now - t->last_switch_timestamp) < | ||
| 198 | sysctl_hung_task_timeout_secs) | ||
| 199 | return; | ||
| 200 | if (!sysctl_hung_task_warnings) | ||
| 201 | return; | ||
| 202 | sysctl_hung_task_warnings--; | ||
| 203 | |||
| 204 | /* | ||
| 205 | * Ok, the task did not get scheduled for more than 2 minutes, | ||
| 206 | * complain: | ||
| 207 | */ | ||
| 208 | printk(KERN_ERR "INFO: task %s:%d blocked for more than " | ||
| 209 | "%ld seconds.\n", t->comm, t->pid, | ||
| 210 | sysctl_hung_task_timeout_secs); | ||
| 211 | printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" | ||
| 212 | " disables this message.\n"); | ||
| 213 | sched_show_task(t); | ||
| 214 | __debug_show_held_locks(t); | ||
| 215 | |||
| 216 | t->last_switch_timestamp = now; | ||
| 217 | touch_nmi_watchdog(); | ||
| 218 | |||
| 219 | if (softlockup_panic) | ||
| 220 | panic("softlockup: blocked tasks"); | ||
| 221 | } | ||
| 222 | |||
| 223 | /* | ||
| 224 | * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for | ||
| 225 | * a really long time (120 seconds). If that happens, print out | ||
| 226 | * a warning. | ||
| 227 | */ | ||
| 228 | static void check_hung_uninterruptible_tasks(int this_cpu) | ||
| 229 | { | ||
| 230 | int max_count = sysctl_hung_task_check_count; | ||
| 231 | unsigned long now = get_timestamp(this_cpu); | ||
| 232 | struct task_struct *g, *t; | ||
| 233 | |||
| 234 | /* | ||
| 235 | * If the system crashed already then all bets are off, | ||
| 236 | * do not report extra hung tasks: | ||
| 237 | */ | ||
| 238 | if (test_taint(TAINT_DIE) || did_panic) | ||
| 239 | return; | ||
| 240 | |||
| 241 | read_lock(&tasklist_lock); | ||
| 242 | do_each_thread(g, t) { | ||
| 243 | if (!--max_count) | ||
| 244 | goto unlock; | ||
| 245 | /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ | ||
| 246 | if (t->state == TASK_UNINTERRUPTIBLE) | ||
| 247 | check_hung_task(t, now); | ||
| 248 | } while_each_thread(g, t); | ||
| 249 | unlock: | ||
| 250 | read_unlock(&tasklist_lock); | ||
| 251 | } | ||
| 252 | |||
| 253 | /* | ||
| 254 | * The watchdog thread - runs every second and touches the timestamp. | 169 | * The watchdog thread - runs every second and touches the timestamp. |
| 255 | */ | 170 | */ |
| 256 | static int watchdog(void *__bind_cpu) | 171 | static int watchdog(void *__bind_cpu) |
| 257 | { | 172 | { |
| 258 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 173 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
| 259 | int this_cpu = (long)__bind_cpu; | ||
| 260 | 174 | ||
| 261 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 175 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
| 262 | 176 | ||
| @@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu) | |||
| 276 | if (kthread_should_stop()) | 190 | if (kthread_should_stop()) |
| 277 | break; | 191 | break; |
| 278 | 192 | ||
| 279 | if (this_cpu == check_cpu) { | ||
| 280 | if (sysctl_hung_task_timeout_secs) | ||
| 281 | check_hung_uninterruptible_tasks(this_cpu); | ||
| 282 | } | ||
| 283 | |||
| 284 | set_current_state(TASK_INTERRUPTIBLE); | 193 | set_current_state(TASK_INTERRUPTIBLE); |
| 285 | } | 194 | } |
| 286 | __set_current_state(TASK_RUNNING); | 195 | __set_current_state(TASK_RUNNING); |
| @@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 312 | break; | 221 | break; |
| 313 | case CPU_ONLINE: | 222 | case CPU_ONLINE: |
| 314 | case CPU_ONLINE_FROZEN: | 223 | case CPU_ONLINE_FROZEN: |
| 315 | check_cpu = cpumask_any(cpu_online_mask); | ||
| 316 | wake_up_process(per_cpu(watchdog_task, hotcpu)); | 224 | wake_up_process(per_cpu(watchdog_task, hotcpu)); |
| 317 | break; | 225 | break; |
| 318 | #ifdef CONFIG_HOTPLUG_CPU | 226 | #ifdef CONFIG_HOTPLUG_CPU |
| 319 | case CPU_DOWN_PREPARE: | ||
| 320 | case CPU_DOWN_PREPARE_FROZEN: | ||
| 321 | if (hotcpu == check_cpu) { | ||
| 322 | /* Pick any other online cpu. */ | ||
| 323 | check_cpu = cpumask_any_but(cpu_online_mask, hotcpu); | ||
| 324 | } | ||
| 325 | break; | ||
| 326 | |||
| 327 | case CPU_UP_CANCELED: | 227 | case CPU_UP_CANCELED: |
| 328 | case CPU_UP_CANCELED_FROZEN: | 228 | case CPU_UP_CANCELED_FROZEN: |
| 329 | if (!per_cpu(watchdog_task, hotcpu)) | 229 | if (!per_cpu(watchdog_task, hotcpu)) |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b125e3387568..4286b62b34a0 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -101,6 +101,7 @@ static int __maybe_unused one = 1; | |||
| 101 | static int __maybe_unused two = 2; | 101 | static int __maybe_unused two = 2; |
| 102 | static unsigned long one_ul = 1; | 102 | static unsigned long one_ul = 1; |
| 103 | static int one_hundred = 100; | 103 | static int one_hundred = 100; |
| 104 | static int one_thousand = 1000; | ||
| 104 | 105 | ||
| 105 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ | 106 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ |
| 106 | static int maxolduid = 65535; | 107 | static int maxolduid = 65535; |
| @@ -813,6 +814,19 @@ static struct ctl_table kern_table[] = { | |||
| 813 | .extra1 = &neg_one, | 814 | .extra1 = &neg_one, |
| 814 | .extra2 = &sixty, | 815 | .extra2 = &sixty, |
| 815 | }, | 816 | }, |
| 817 | #endif | ||
| 818 | #ifdef CONFIG_DETECT_HUNG_TASK | ||
| 819 | { | ||
| 820 | .ctl_name = CTL_UNNUMBERED, | ||
| 821 | .procname = "hung_task_panic", | ||
| 822 | .data = &sysctl_hung_task_panic, | ||
| 823 | .maxlen = sizeof(int), | ||
| 824 | .mode = 0644, | ||
| 825 | .proc_handler = &proc_dointvec_minmax, | ||
| 826 | .strategy = &sysctl_intvec, | ||
| 827 | .extra1 = &zero, | ||
| 828 | .extra2 = &one, | ||
| 829 | }, | ||
| 816 | { | 830 | { |
| 817 | .ctl_name = CTL_UNNUMBERED, | 831 | .ctl_name = CTL_UNNUMBERED, |
| 818 | .procname = "hung_task_check_count", | 832 | .procname = "hung_task_check_count", |
| @@ -828,7 +842,7 @@ static struct ctl_table kern_table[] = { | |||
| 828 | .data = &sysctl_hung_task_timeout_secs, | 842 | .data = &sysctl_hung_task_timeout_secs, |
| 829 | .maxlen = sizeof(unsigned long), | 843 | .maxlen = sizeof(unsigned long), |
| 830 | .mode = 0644, | 844 | .mode = 0644, |
| 831 | .proc_handler = &proc_doulongvec_minmax, | 845 | .proc_handler = &proc_dohung_task_timeout_secs, |
| 832 | .strategy = &sysctl_intvec, | 846 | .strategy = &sysctl_intvec, |
| 833 | }, | 847 | }, |
| 834 | { | 848 | { |
| @@ -1027,6 +1041,28 @@ static struct ctl_table vm_table[] = { | |||
| 1027 | .proc_handler = &proc_dointvec, | 1041 | .proc_handler = &proc_dointvec, |
| 1028 | }, | 1042 | }, |
| 1029 | { | 1043 | { |
| 1044 | .ctl_name = CTL_UNNUMBERED, | ||
| 1045 | .procname = "nr_pdflush_threads_min", | ||
| 1046 | .data = &nr_pdflush_threads_min, | ||
| 1047 | .maxlen = sizeof nr_pdflush_threads_min, | ||
| 1048 | .mode = 0644 /* read-write */, | ||
| 1049 | .proc_handler = &proc_dointvec_minmax, | ||
| 1050 | .strategy = &sysctl_intvec, | ||
| 1051 | .extra1 = &one, | ||
| 1052 | .extra2 = &nr_pdflush_threads_max, | ||
| 1053 | }, | ||
| 1054 | { | ||
| 1055 | .ctl_name = CTL_UNNUMBERED, | ||
| 1056 | .procname = "nr_pdflush_threads_max", | ||
| 1057 | .data = &nr_pdflush_threads_max, | ||
| 1058 | .maxlen = sizeof nr_pdflush_threads_max, | ||
| 1059 | .mode = 0644 /* read-write */, | ||
| 1060 | .proc_handler = &proc_dointvec_minmax, | ||
| 1061 | .strategy = &sysctl_intvec, | ||
| 1062 | .extra1 = &nr_pdflush_threads_min, | ||
| 1063 | .extra2 = &one_thousand, | ||
| 1064 | }, | ||
| 1065 | { | ||
| 1030 | .ctl_name = VM_SWAPPINESS, | 1066 | .ctl_name = VM_SWAPPINESS, |
| 1031 | .procname = "swappiness", | 1067 | .procname = "swappiness", |
| 1032 | .data = &vm_swappiness, | 1068 | .data = &vm_swappiness, |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 947c5b3f90c4..b32ff446c3fb 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
| @@ -327,10 +327,10 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, | |||
| 327 | char *msg; | 327 | char *msg; |
| 328 | struct blk_trace *bt; | 328 | struct blk_trace *bt; |
| 329 | 329 | ||
| 330 | if (count > BLK_TN_MAX_MSG) | 330 | if (count >= BLK_TN_MAX_MSG) |
| 331 | return -EINVAL; | 331 | return -EINVAL; |
| 332 | 332 | ||
| 333 | msg = kmalloc(count, GFP_KERNEL); | 333 | msg = kmalloc(count + 1, GFP_KERNEL); |
| 334 | if (msg == NULL) | 334 | if (msg == NULL) |
| 335 | return -ENOMEM; | 335 | return -ENOMEM; |
| 336 | 336 | ||
| @@ -339,6 +339,7 @@ static ssize_t blk_msg_write(struct file *filp, const char __user *buffer, | |||
| 339 | return -EFAULT; | 339 | return -EFAULT; |
| 340 | } | 340 | } |
| 341 | 341 | ||
| 342 | msg[count] = '\0'; | ||
| 342 | bt = filp->private_data; | 343 | bt = filp->private_data; |
| 343 | __trace_note_message(bt, "%s", msg); | 344 | __trace_note_message(bt, "%s", msg); |
| 344 | kfree(msg); | 345 | kfree(msg); |
| @@ -642,7 +643,7 @@ static void blk_add_trace_rq(struct request_queue *q, struct request *rq, | |||
| 642 | if (blk_pc_request(rq)) { | 643 | if (blk_pc_request(rq)) { |
| 643 | what |= BLK_TC_ACT(BLK_TC_PC); | 644 | what |= BLK_TC_ACT(BLK_TC_PC); |
| 644 | __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, | 645 | __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, |
| 645 | sizeof(rq->cmd), rq->cmd); | 646 | rq->cmd_len, rq->cmd); |
| 646 | } else { | 647 | } else { |
| 647 | what |= BLK_TC_ACT(BLK_TC_FS); | 648 | what |= BLK_TC_ACT(BLK_TC_FS); |
| 648 | __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, | 649 | __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a0174a40c563..9d28476a9851 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include <linux/percpu.h> | 30 | #include <linux/percpu.h> |
| 31 | #include <linux/splice.h> | 31 | #include <linux/splice.h> |
| 32 | #include <linux/kdebug.h> | 32 | #include <linux/kdebug.h> |
| 33 | #include <linux/string.h> | ||
| 33 | #include <linux/ctype.h> | 34 | #include <linux/ctype.h> |
| 34 | #include <linux/init.h> | 35 | #include <linux/init.h> |
| 35 | #include <linux/poll.h> | 36 | #include <linux/poll.h> |
| @@ -147,8 +148,7 @@ static int __init set_ftrace_dump_on_oops(char *str) | |||
| 147 | } | 148 | } |
| 148 | __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); | 149 | __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops); |
| 149 | 150 | ||
| 150 | long | 151 | unsigned long long ns2usecs(cycle_t nsec) |
| 151 | ns2usecs(cycle_t nsec) | ||
| 152 | { | 152 | { |
| 153 | nsec += 500; | 153 | nsec += 500; |
| 154 | do_div(nsec, 1000); | 154 | do_div(nsec, 1000); |
| @@ -1632,7 +1632,11 @@ static void test_cpu_buff_start(struct trace_iterator *iter) | |||
| 1632 | return; | 1632 | return; |
| 1633 | 1633 | ||
| 1634 | cpumask_set_cpu(iter->cpu, iter->started); | 1634 | cpumask_set_cpu(iter->cpu, iter->started); |
| 1635 | trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu); | 1635 | |
| 1636 | /* Don't print started cpu buffer for the first entry of the trace */ | ||
| 1637 | if (iter->idx > 1) | ||
| 1638 | trace_seq_printf(s, "##### CPU %u buffer started ####\n", | ||
| 1639 | iter->cpu); | ||
| 1636 | } | 1640 | } |
| 1637 | 1641 | ||
| 1638 | static enum print_line_t print_trace_fmt(struct trace_iterator *iter) | 1642 | static enum print_line_t print_trace_fmt(struct trace_iterator *iter) |
| @@ -1867,6 +1871,11 @@ __tracing_open(struct inode *inode, struct file *file) | |||
| 1867 | if (current_trace) | 1871 | if (current_trace) |
| 1868 | *iter->trace = *current_trace; | 1872 | *iter->trace = *current_trace; |
| 1869 | 1873 | ||
| 1874 | if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) | ||
| 1875 | goto fail; | ||
| 1876 | |||
| 1877 | cpumask_clear(iter->started); | ||
| 1878 | |||
| 1870 | if (current_trace && current_trace->print_max) | 1879 | if (current_trace && current_trace->print_max) |
| 1871 | iter->tr = &max_tr; | 1880 | iter->tr = &max_tr; |
| 1872 | else | 1881 | else |
| @@ -1917,6 +1926,7 @@ __tracing_open(struct inode *inode, struct file *file) | |||
| 1917 | if (iter->buffer_iter[cpu]) | 1926 | if (iter->buffer_iter[cpu]) |
| 1918 | ring_buffer_read_finish(iter->buffer_iter[cpu]); | 1927 | ring_buffer_read_finish(iter->buffer_iter[cpu]); |
| 1919 | } | 1928 | } |
| 1929 | free_cpumask_var(iter->started); | ||
| 1920 | fail: | 1930 | fail: |
| 1921 | mutex_unlock(&trace_types_lock); | 1931 | mutex_unlock(&trace_types_lock); |
| 1922 | kfree(iter->trace); | 1932 | kfree(iter->trace); |
| @@ -1960,6 +1970,7 @@ static int tracing_release(struct inode *inode, struct file *file) | |||
| 1960 | 1970 | ||
| 1961 | seq_release(inode, file); | 1971 | seq_release(inode, file); |
| 1962 | mutex_destroy(&iter->mutex); | 1972 | mutex_destroy(&iter->mutex); |
| 1973 | free_cpumask_var(iter->started); | ||
| 1963 | kfree(iter->trace); | 1974 | kfree(iter->trace); |
| 1964 | kfree(iter); | 1975 | kfree(iter); |
| 1965 | return 0; | 1976 | return 0; |
| @@ -2358,9 +2369,9 @@ static const char readme_msg[] = | |||
| 2358 | "# mkdir /debug\n" | 2369 | "# mkdir /debug\n" |
| 2359 | "# mount -t debugfs nodev /debug\n\n" | 2370 | "# mount -t debugfs nodev /debug\n\n" |
| 2360 | "# cat /debug/tracing/available_tracers\n" | 2371 | "# cat /debug/tracing/available_tracers\n" |
| 2361 | "wakeup preemptirqsoff preemptoff irqsoff ftrace sched_switch none\n\n" | 2372 | "wakeup preemptirqsoff preemptoff irqsoff function sched_switch nop\n\n" |
| 2362 | "# cat /debug/tracing/current_tracer\n" | 2373 | "# cat /debug/tracing/current_tracer\n" |
| 2363 | "none\n" | 2374 | "nop\n" |
| 2364 | "# echo sched_switch > /debug/tracing/current_tracer\n" | 2375 | "# echo sched_switch > /debug/tracing/current_tracer\n" |
| 2365 | "# cat /debug/tracing/current_tracer\n" | 2376 | "# cat /debug/tracing/current_tracer\n" |
| 2366 | "sched_switch\n" | 2377 | "sched_switch\n" |
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index cbc168f1e43d..e685ac2b2ba1 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h | |||
| @@ -602,7 +602,7 @@ extern int trace_selftest_startup_branch(struct tracer *trace, | |||
| 602 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ | 602 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ |
| 603 | 603 | ||
| 604 | extern void *head_page(struct trace_array_cpu *data); | 604 | extern void *head_page(struct trace_array_cpu *data); |
| 605 | extern long ns2usecs(cycle_t nsec); | 605 | extern unsigned long long ns2usecs(cycle_t nsec); |
| 606 | extern int | 606 | extern int |
| 607 | trace_vbprintk(unsigned long ip, const char *fmt, va_list args); | 607 | trace_vbprintk(unsigned long ip, const char *fmt, va_list args); |
| 608 | extern int | 608 | extern int |
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 4d9952d3df50..07a22c33ebf3 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c | |||
| @@ -40,7 +40,7 @@ | |||
| 40 | 40 | ||
| 41 | #undef TRACE_FIELD_ZERO_CHAR | 41 | #undef TRACE_FIELD_ZERO_CHAR |
| 42 | #define TRACE_FIELD_ZERO_CHAR(item) \ | 42 | #define TRACE_FIELD_ZERO_CHAR(item) \ |
| 43 | ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \ | 43 | ret = trace_seq_printf(s, "\tfield:char " #item ";\t" \ |
| 44 | "offset:%u;\tsize:0;\n", \ | 44 | "offset:%u;\tsize:0;\n", \ |
| 45 | (unsigned int)offsetof(typeof(field), item)); \ | 45 | (unsigned int)offsetof(typeof(field), item)); \ |
| 46 | if (!ret) \ | 46 | if (!ret) \ |
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index d72b9a63b247..64b54a59c55b 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c | |||
| @@ -423,7 +423,7 @@ int trace_print_lat_context(struct trace_iterator *iter) | |||
| 423 | 423 | ||
| 424 | trace_find_cmdline(entry->pid, comm); | 424 | trace_find_cmdline(entry->pid, comm); |
| 425 | 425 | ||
| 426 | ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]" | 426 | ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]" |
| 427 | " %ld.%03ldms (+%ld.%03ldms): ", comm, | 427 | " %ld.%03ldms (+%ld.%03ldms): ", comm, |
| 428 | entry->pid, iter->cpu, entry->flags, | 428 | entry->pid, iter->cpu, entry->flags, |
| 429 | entry->preempt_count, iter->idx, | 429 | entry->preempt_count, iter->idx, |
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index de35f200abd3..9117cea6f1ae 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c | |||
| @@ -62,6 +62,9 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success) | |||
| 62 | pc = preempt_count(); | 62 | pc = preempt_count(); |
| 63 | tracing_record_cmdline(current); | 63 | tracing_record_cmdline(current); |
| 64 | 64 | ||
| 65 | if (sched_stopped) | ||
| 66 | return; | ||
| 67 | |||
| 65 | local_irq_save(flags); | 68 | local_irq_save(flags); |
| 66 | cpu = raw_smp_processor_id(); | 69 | cpu = raw_smp_processor_id(); |
| 67 | data = ctx_trace->data[cpu]; | 70 | data = ctx_trace->data[cpu]; |
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 3c5ad6b2ec84..5bc00e8f153e 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c | |||
| @@ -154,7 +154,7 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev, | |||
| 154 | if (unlikely(!tracer_enabled || next != wakeup_task)) | 154 | if (unlikely(!tracer_enabled || next != wakeup_task)) |
| 155 | goto out_unlock; | 155 | goto out_unlock; |
| 156 | 156 | ||
| 157 | trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); | 157 | trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc); |
| 158 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); | 158 | tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc); |
| 159 | 159 | ||
| 160 | /* | 160 | /* |
| @@ -257,6 +257,12 @@ probe_wakeup(struct rq *rq, struct task_struct *p, int success) | |||
| 257 | data = wakeup_trace->data[wakeup_cpu]; | 257 | data = wakeup_trace->data[wakeup_cpu]; |
| 258 | data->preempt_timestamp = ftrace_now(cpu); | 258 | data->preempt_timestamp = ftrace_now(cpu); |
| 259 | tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); | 259 | tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc); |
| 260 | |||
| 261 | /* | ||
| 262 | * We must be careful in using CALLER_ADDR2. But since wake_up | ||
| 263 | * is not called by an assembly function (where as schedule is) | ||
| 264 | * it should be safe to use it here. | ||
| 265 | */ | ||
| 260 | trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); | 266 | trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc); |
| 261 | 267 | ||
| 262 | out_locked: | 268 | out_locked: |
