diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-04-10 06:46:28 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-04-10 06:46:51 -0400 |
commit | 1cad1252ed279ea59f3f8d3d3a5817eeb2f7a4d3 (patch) | |
tree | ec5af7a70f58ad27ad21fc27815ca164ccf92c36 /kernel | |
parent | dcef788eb9659b61a2110284fcce3ca6e63480d2 (diff) | |
parent | 93cfb3c9fd83d877a8f1ffad9ff862b617b32828 (diff) |
Merge branch 'tracing/urgent' into tracing/core
Merge reason: pick up both v2.6.30-rc1 [which includes tracing/urgent fixes]
and pick up the current lineup of tracing/urgent fixes as well
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/exit.c | 2 | ||||
-rw-r--r-- | kernel/fork.c | 21 | ||||
-rw-r--r-- | kernel/futex.c | 7 | ||||
-rw-r--r-- | kernel/hung_task.c | 217 | ||||
-rw-r--r-- | kernel/irq/devres.c | 16 | ||||
-rw-r--r-- | kernel/irq/handle.c | 50 | ||||
-rw-r--r-- | kernel/irq/manage.c | 189 | ||||
-rw-r--r-- | kernel/irq/numa_migrate.c | 1 | ||||
-rw-r--r-- | kernel/kprobes.c | 281 | ||||
-rw-r--r-- | kernel/kthread.c | 26 | ||||
-rw-r--r-- | kernel/module.c | 3 | ||||
-rw-r--r-- | kernel/posix-cpu-timers.c | 9 | ||||
-rw-r--r-- | kernel/ptrace.c | 16 | ||||
-rw-r--r-- | kernel/sched.c | 160 | ||||
-rw-r--r-- | kernel/sched_cpupri.c | 5 | ||||
-rw-r--r-- | kernel/sched_rt.c | 15 | ||||
-rw-r--r-- | kernel/softlockup.c | 100 | ||||
-rw-r--r-- | kernel/sysctl.c | 38 | ||||
-rw-r--r-- | kernel/timer.c | 7 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 10 | ||||
-rw-r--r-- | kernel/trace/trace.c | 36 | ||||
-rw-r--r-- | kernel/trace/trace_syscalls.c | 2 | ||||
-rw-r--r-- | kernel/workqueue.c | 36 |
24 files changed, 939 insertions, 309 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index bab1dffe37e9..42423665660a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -74,6 +74,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o | |||
74 | obj-$(CONFIG_KPROBES) += kprobes.o | 74 | obj-$(CONFIG_KPROBES) += kprobes.o |
75 | obj-$(CONFIG_KGDB) += kgdb.o | 75 | obj-$(CONFIG_KGDB) += kgdb.o |
76 | obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o | 76 | obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o |
77 | obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o | ||
77 | obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ | 78 | obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ |
78 | obj-$(CONFIG_SECCOMP) += seccomp.o | 79 | obj-$(CONFIG_SECCOMP) += seccomp.o |
79 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 80 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
diff --git a/kernel/exit.c b/kernel/exit.c index 32cbf2607cb0..abf9cf3b95c6 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -923,6 +923,8 @@ NORET_TYPE void do_exit(long code) | |||
923 | schedule(); | 923 | schedule(); |
924 | } | 924 | } |
925 | 925 | ||
926 | exit_irq_thread(); | ||
927 | |||
926 | exit_signals(tsk); /* sets PF_EXITING */ | 928 | exit_signals(tsk); /* sets PF_EXITING */ |
927 | /* | 929 | /* |
928 | * tsk->flags are checked in the futex code to protect against | 930 | * tsk->flags are checked in the futex code to protect against |
diff --git a/kernel/fork.c b/kernel/fork.c index 660c2b8765bc..b9e2edd00726 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -645,6 +645,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) | |||
645 | 645 | ||
646 | tsk->min_flt = tsk->maj_flt = 0; | 646 | tsk->min_flt = tsk->maj_flt = 0; |
647 | tsk->nvcsw = tsk->nivcsw = 0; | 647 | tsk->nvcsw = tsk->nivcsw = 0; |
648 | #ifdef CONFIG_DETECT_HUNG_TASK | ||
649 | tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; | ||
650 | #endif | ||
648 | 651 | ||
649 | tsk->mm = NULL; | 652 | tsk->mm = NULL; |
650 | tsk->active_mm = NULL; | 653 | tsk->active_mm = NULL; |
@@ -797,6 +800,12 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) | |||
797 | sig->cputime_expires.virt_exp = cputime_zero; | 800 | sig->cputime_expires.virt_exp = cputime_zero; |
798 | sig->cputime_expires.sched_exp = 0; | 801 | sig->cputime_expires.sched_exp = 0; |
799 | 802 | ||
803 | if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { | ||
804 | sig->cputime_expires.prof_exp = | ||
805 | secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); | ||
806 | sig->cputimer.running = 1; | ||
807 | } | ||
808 | |||
800 | /* The timer lists. */ | 809 | /* The timer lists. */ |
801 | INIT_LIST_HEAD(&sig->cpu_timers[0]); | 810 | INIT_LIST_HEAD(&sig->cpu_timers[0]); |
802 | INIT_LIST_HEAD(&sig->cpu_timers[1]); | 811 | INIT_LIST_HEAD(&sig->cpu_timers[1]); |
@@ -812,11 +821,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
812 | atomic_inc(¤t->signal->live); | 821 | atomic_inc(¤t->signal->live); |
813 | return 0; | 822 | return 0; |
814 | } | 823 | } |
815 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | ||
816 | |||
817 | if (sig) | ||
818 | posix_cpu_timers_init_group(sig); | ||
819 | 824 | ||
825 | sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); | ||
820 | tsk->signal = sig; | 826 | tsk->signal = sig; |
821 | if (!sig) | 827 | if (!sig) |
822 | return -ENOMEM; | 828 | return -ENOMEM; |
@@ -856,6 +862,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) | |||
856 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); | 862 | memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); |
857 | task_unlock(current->group_leader); | 863 | task_unlock(current->group_leader); |
858 | 864 | ||
865 | posix_cpu_timers_init_group(sig); | ||
866 | |||
859 | acct_init_pacct(&sig->pacct); | 867 | acct_init_pacct(&sig->pacct); |
860 | 868 | ||
861 | tty_audit_fork(sig); | 869 | tty_audit_fork(sig); |
@@ -1032,11 +1040,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
1032 | 1040 | ||
1033 | p->default_timer_slack_ns = current->timer_slack_ns; | 1041 | p->default_timer_slack_ns = current->timer_slack_ns; |
1034 | 1042 | ||
1035 | #ifdef CONFIG_DETECT_SOFTLOCKUP | ||
1036 | p->last_switch_count = 0; | ||
1037 | p->last_switch_timestamp = 0; | ||
1038 | #endif | ||
1039 | |||
1040 | task_io_accounting_init(&p->ioac); | 1043 | task_io_accounting_init(&p->ioac); |
1041 | acct_clear_integrals(p); | 1044 | acct_clear_integrals(p); |
1042 | 1045 | ||
diff --git a/kernel/futex.c b/kernel/futex.c index 6b50a024bca2..eef8cd26b5e5 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -883,7 +883,12 @@ retry_private: | |||
883 | out_unlock: | 883 | out_unlock: |
884 | double_unlock_hb(hb1, hb2); | 884 | double_unlock_hb(hb1, hb2); |
885 | 885 | ||
886 | /* drop_futex_key_refs() must be called outside the spinlocks. */ | 886 | /* |
887 | * drop_futex_key_refs() must be called outside the spinlocks. During | ||
888 | * the requeue we moved futex_q's from the hash bucket at key1 to the | ||
889 | * one at key2 and updated their key pointer. We no longer need to | ||
890 | * hold the references to key1. | ||
891 | */ | ||
887 | while (--drop_count >= 0) | 892 | while (--drop_count >= 0) |
888 | drop_futex_key_refs(&key1); | 893 | drop_futex_key_refs(&key1); |
889 | 894 | ||
diff --git a/kernel/hung_task.c b/kernel/hung_task.c new file mode 100644 index 000000000000..022a4927b785 --- /dev/null +++ b/kernel/hung_task.c | |||
@@ -0,0 +1,217 @@ | |||
1 | /* | ||
2 | * Detect Hung Task | ||
3 | * | ||
4 | * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state | ||
5 | * | ||
6 | */ | ||
7 | |||
8 | #include <linux/mm.h> | ||
9 | #include <linux/cpu.h> | ||
10 | #include <linux/nmi.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/delay.h> | ||
13 | #include <linux/freezer.h> | ||
14 | #include <linux/kthread.h> | ||
15 | #include <linux/lockdep.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/sysctl.h> | ||
18 | |||
19 | /* | ||
20 | * The number of tasks checked: | ||
21 | */ | ||
22 | unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; | ||
23 | |||
24 | /* | ||
25 | * Limit number of tasks checked in a batch. | ||
26 | * | ||
27 | * This value controls the preemptibility of khungtaskd since preemption | ||
28 | * is disabled during the critical section. It also controls the size of | ||
29 | * the RCU grace period. So it needs to be upper-bound. | ||
30 | */ | ||
31 | #define HUNG_TASK_BATCHING 1024 | ||
32 | |||
33 | /* | ||
34 | * Zero means infinite timeout - no checking done: | ||
35 | */ | ||
36 | unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; | ||
37 | |||
38 | unsigned long __read_mostly sysctl_hung_task_warnings = 10; | ||
39 | |||
40 | static int __read_mostly did_panic; | ||
41 | |||
42 | static struct task_struct *watchdog_task; | ||
43 | |||
44 | /* | ||
45 | * Should we panic (and reboot, if panic_timeout= is set) when a | ||
46 | * hung task is detected: | ||
47 | */ | ||
48 | unsigned int __read_mostly sysctl_hung_task_panic = | ||
49 | CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE; | ||
50 | |||
51 | static int __init hung_task_panic_setup(char *str) | ||
52 | { | ||
53 | sysctl_hung_task_panic = simple_strtoul(str, NULL, 0); | ||
54 | |||
55 | return 1; | ||
56 | } | ||
57 | __setup("hung_task_panic=", hung_task_panic_setup); | ||
58 | |||
59 | static int | ||
60 | hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr) | ||
61 | { | ||
62 | did_panic = 1; | ||
63 | |||
64 | return NOTIFY_DONE; | ||
65 | } | ||
66 | |||
67 | static struct notifier_block panic_block = { | ||
68 | .notifier_call = hung_task_panic, | ||
69 | }; | ||
70 | |||
71 | static void check_hung_task(struct task_struct *t, unsigned long timeout) | ||
72 | { | ||
73 | unsigned long switch_count = t->nvcsw + t->nivcsw; | ||
74 | |||
75 | /* | ||
76 | * Ensure the task is not frozen. | ||
77 | * Also, when a freshly created task is scheduled once, changes | ||
78 | * its state to TASK_UNINTERRUPTIBLE without having ever been | ||
79 | * switched out once, it musn't be checked. | ||
80 | */ | ||
81 | if (unlikely(t->flags & PF_FROZEN || !switch_count)) | ||
82 | return; | ||
83 | |||
84 | if (switch_count != t->last_switch_count) { | ||
85 | t->last_switch_count = switch_count; | ||
86 | return; | ||
87 | } | ||
88 | if (!sysctl_hung_task_warnings) | ||
89 | return; | ||
90 | sysctl_hung_task_warnings--; | ||
91 | |||
92 | /* | ||
93 | * Ok, the task did not get scheduled for more than 2 minutes, | ||
94 | * complain: | ||
95 | */ | ||
96 | printk(KERN_ERR "INFO: task %s:%d blocked for more than " | ||
97 | "%ld seconds.\n", t->comm, t->pid, timeout); | ||
98 | printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" | ||
99 | " disables this message.\n"); | ||
100 | sched_show_task(t); | ||
101 | __debug_show_held_locks(t); | ||
102 | |||
103 | touch_nmi_watchdog(); | ||
104 | |||
105 | if (sysctl_hung_task_panic) | ||
106 | panic("hung_task: blocked tasks"); | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * To avoid extending the RCU grace period for an unbounded amount of time, | ||
111 | * periodically exit the critical section and enter a new one. | ||
112 | * | ||
113 | * For preemptible RCU it is sufficient to call rcu_read_unlock in order | ||
114 | * exit the grace period. For classic RCU, a reschedule is required. | ||
115 | */ | ||
116 | static void rcu_lock_break(struct task_struct *g, struct task_struct *t) | ||
117 | { | ||
118 | get_task_struct(g); | ||
119 | get_task_struct(t); | ||
120 | rcu_read_unlock(); | ||
121 | cond_resched(); | ||
122 | rcu_read_lock(); | ||
123 | put_task_struct(t); | ||
124 | put_task_struct(g); | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for | ||
129 | * a really long time (120 seconds). If that happens, print out | ||
130 | * a warning. | ||
131 | */ | ||
132 | static void check_hung_uninterruptible_tasks(unsigned long timeout) | ||
133 | { | ||
134 | int max_count = sysctl_hung_task_check_count; | ||
135 | int batch_count = HUNG_TASK_BATCHING; | ||
136 | struct task_struct *g, *t; | ||
137 | |||
138 | /* | ||
139 | * If the system crashed already then all bets are off, | ||
140 | * do not report extra hung tasks: | ||
141 | */ | ||
142 | if (test_taint(TAINT_DIE) || did_panic) | ||
143 | return; | ||
144 | |||
145 | rcu_read_lock(); | ||
146 | do_each_thread(g, t) { | ||
147 | if (!--max_count) | ||
148 | goto unlock; | ||
149 | if (!--batch_count) { | ||
150 | batch_count = HUNG_TASK_BATCHING; | ||
151 | rcu_lock_break(g, t); | ||
152 | /* Exit if t or g was unhashed during refresh. */ | ||
153 | if (t->state == TASK_DEAD || g->state == TASK_DEAD) | ||
154 | goto unlock; | ||
155 | } | ||
156 | /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ | ||
157 | if (t->state == TASK_UNINTERRUPTIBLE) | ||
158 | check_hung_task(t, timeout); | ||
159 | } while_each_thread(g, t); | ||
160 | unlock: | ||
161 | rcu_read_unlock(); | ||
162 | } | ||
163 | |||
164 | static unsigned long timeout_jiffies(unsigned long timeout) | ||
165 | { | ||
166 | /* timeout of 0 will disable the watchdog */ | ||
167 | return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT; | ||
168 | } | ||
169 | |||
170 | /* | ||
171 | * Process updating of timeout sysctl | ||
172 | */ | ||
173 | int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, | ||
174 | struct file *filp, void __user *buffer, | ||
175 | size_t *lenp, loff_t *ppos) | ||
176 | { | ||
177 | int ret; | ||
178 | |||
179 | ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
180 | |||
181 | if (ret || !write) | ||
182 | goto out; | ||
183 | |||
184 | wake_up_process(watchdog_task); | ||
185 | |||
186 | out: | ||
187 | return ret; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * kthread which checks for tasks stuck in D state | ||
192 | */ | ||
193 | static int watchdog(void *dummy) | ||
194 | { | ||
195 | set_user_nice(current, 0); | ||
196 | |||
197 | for ( ; ; ) { | ||
198 | unsigned long timeout = sysctl_hung_task_timeout_secs; | ||
199 | |||
200 | while (schedule_timeout_interruptible(timeout_jiffies(timeout))) | ||
201 | timeout = sysctl_hung_task_timeout_secs; | ||
202 | |||
203 | check_hung_uninterruptible_tasks(timeout); | ||
204 | } | ||
205 | |||
206 | return 0; | ||
207 | } | ||
208 | |||
209 | static int __init hung_task_init(void) | ||
210 | { | ||
211 | atomic_notifier_chain_register(&panic_notifier_list, &panic_block); | ||
212 | watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); | ||
213 | |||
214 | return 0; | ||
215 | } | ||
216 | |||
217 | module_init(hung_task_init); | ||
diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 38a25b8d8bff..d06df9c41cba 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c | |||
@@ -26,10 +26,12 @@ static int devm_irq_match(struct device *dev, void *res, void *data) | |||
26 | } | 26 | } |
27 | 27 | ||
28 | /** | 28 | /** |
29 | * devm_request_irq - allocate an interrupt line for a managed device | 29 | * devm_request_threaded_irq - allocate an interrupt line for a managed device |
30 | * @dev: device to request interrupt for | 30 | * @dev: device to request interrupt for |
31 | * @irq: Interrupt line to allocate | 31 | * @irq: Interrupt line to allocate |
32 | * @handler: Function to be called when the IRQ occurs | 32 | * @handler: Function to be called when the IRQ occurs |
33 | * @thread_fn: function to be called in a threaded interrupt context. NULL | ||
34 | * for devices which handle everything in @handler | ||
33 | * @irqflags: Interrupt type flags | 35 | * @irqflags: Interrupt type flags |
34 | * @devname: An ascii name for the claiming device | 36 | * @devname: An ascii name for the claiming device |
35 | * @dev_id: A cookie passed back to the handler function | 37 | * @dev_id: A cookie passed back to the handler function |
@@ -42,9 +44,10 @@ static int devm_irq_match(struct device *dev, void *res, void *data) | |||
42 | * If an IRQ allocated with this function needs to be freed | 44 | * If an IRQ allocated with this function needs to be freed |
43 | * separately, dev_free_irq() must be used. | 45 | * separately, dev_free_irq() must be used. |
44 | */ | 46 | */ |
45 | int devm_request_irq(struct device *dev, unsigned int irq, | 47 | int devm_request_threaded_irq(struct device *dev, unsigned int irq, |
46 | irq_handler_t handler, unsigned long irqflags, | 48 | irq_handler_t handler, irq_handler_t thread_fn, |
47 | const char *devname, void *dev_id) | 49 | unsigned long irqflags, const char *devname, |
50 | void *dev_id) | ||
48 | { | 51 | { |
49 | struct irq_devres *dr; | 52 | struct irq_devres *dr; |
50 | int rc; | 53 | int rc; |
@@ -54,7 +57,8 @@ int devm_request_irq(struct device *dev, unsigned int irq, | |||
54 | if (!dr) | 57 | if (!dr) |
55 | return -ENOMEM; | 58 | return -ENOMEM; |
56 | 59 | ||
57 | rc = request_irq(irq, handler, irqflags, devname, dev_id); | 60 | rc = request_threaded_irq(irq, handler, thread_fn, irqflags, devname, |
61 | dev_id); | ||
58 | if (rc) { | 62 | if (rc) { |
59 | devres_free(dr); | 63 | devres_free(dr); |
60 | return rc; | 64 | return rc; |
@@ -66,7 +70,7 @@ int devm_request_irq(struct device *dev, unsigned int irq, | |||
66 | 70 | ||
67 | return 0; | 71 | return 0; |
68 | } | 72 | } |
69 | EXPORT_SYMBOL(devm_request_irq); | 73 | EXPORT_SYMBOL(devm_request_threaded_irq); |
70 | 74 | ||
71 | /** | 75 | /** |
72 | * devm_free_irq - free an interrupt | 76 | * devm_free_irq - free an interrupt |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 343acecae629..d82142be8dd2 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
@@ -339,6 +339,15 @@ irqreturn_t no_action(int cpl, void *dev_id) | |||
339 | return IRQ_NONE; | 339 | return IRQ_NONE; |
340 | } | 340 | } |
341 | 341 | ||
342 | static void warn_no_thread(unsigned int irq, struct irqaction *action) | ||
343 | { | ||
344 | if (test_and_set_bit(IRQTF_WARNED, &action->thread_flags)) | ||
345 | return; | ||
346 | |||
347 | printk(KERN_WARNING "IRQ %d device %s returned IRQ_WAKE_THREAD " | ||
348 | "but no thread function available.", irq, action->name); | ||
349 | } | ||
350 | |||
342 | DEFINE_TRACE(irq_handler_entry); | 351 | DEFINE_TRACE(irq_handler_entry); |
343 | DEFINE_TRACE(irq_handler_exit); | 352 | DEFINE_TRACE(irq_handler_exit); |
344 | 353 | ||
@@ -363,8 +372,47 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) | |||
363 | trace_irq_handler_entry(irq, action); | 372 | trace_irq_handler_entry(irq, action); |
364 | ret = action->handler(irq, action->dev_id); | 373 | ret = action->handler(irq, action->dev_id); |
365 | trace_irq_handler_exit(irq, action, ret); | 374 | trace_irq_handler_exit(irq, action, ret); |
366 | if (ret == IRQ_HANDLED) | 375 | |
376 | switch (ret) { | ||
377 | case IRQ_WAKE_THREAD: | ||
378 | /* | ||
379 | * Set result to handled so the spurious check | ||
380 | * does not trigger. | ||
381 | */ | ||
382 | ret = IRQ_HANDLED; | ||
383 | |||
384 | /* | ||
385 | * Catch drivers which return WAKE_THREAD but | ||
386 | * did not set up a thread function | ||
387 | */ | ||
388 | if (unlikely(!action->thread_fn)) { | ||
389 | warn_no_thread(irq, action); | ||
390 | break; | ||
391 | } | ||
392 | |||
393 | /* | ||
394 | * Wake up the handler thread for this | ||
395 | * action. In case the thread crashed and was | ||
396 | * killed we just pretend that we handled the | ||
397 | * interrupt. The hardirq handler above has | ||
398 | * disabled the device interrupt, so no irq | ||
399 | * storm is lurking. | ||
400 | */ | ||
401 | if (likely(!test_bit(IRQTF_DIED, | ||
402 | &action->thread_flags))) { | ||
403 | set_bit(IRQTF_RUNTHREAD, &action->thread_flags); | ||
404 | wake_up_process(action->thread); | ||
405 | } | ||
406 | |||
407 | /* Fall through to add to randomness */ | ||
408 | case IRQ_HANDLED: | ||
367 | status |= action->flags; | 409 | status |= action->flags; |
410 | break; | ||
411 | |||
412 | default: | ||
413 | break; | ||
414 | } | ||
415 | |||
368 | retval |= ret; | 416 | retval |= ret; |
369 | action = action->next; | 417 | action = action->next; |
370 | } while (action); | 418 | } while (action); |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 1516ab77355c..7e2e7dd4cd2f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
@@ -8,16 +8,15 @@ | |||
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/irq.h> | 10 | #include <linux/irq.h> |
11 | #include <linux/kthread.h> | ||
11 | #include <linux/module.h> | 12 | #include <linux/module.h> |
12 | #include <linux/random.h> | 13 | #include <linux/random.h> |
13 | #include <linux/interrupt.h> | 14 | #include <linux/interrupt.h> |
14 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/sched.h> | ||
15 | 17 | ||
16 | #include "internals.h" | 18 | #include "internals.h" |
17 | 19 | ||
18 | #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS) | ||
19 | cpumask_var_t irq_default_affinity; | ||
20 | |||
21 | /** | 20 | /** |
22 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) | 21 | * synchronize_irq - wait for pending IRQ handlers (on other CPUs) |
23 | * @irq: interrupt number to wait for | 22 | * @irq: interrupt number to wait for |
@@ -53,9 +52,18 @@ void synchronize_irq(unsigned int irq) | |||
53 | 52 | ||
54 | /* Oops, that failed? */ | 53 | /* Oops, that failed? */ |
55 | } while (status & IRQ_INPROGRESS); | 54 | } while (status & IRQ_INPROGRESS); |
55 | |||
56 | /* | ||
57 | * We made sure that no hardirq handler is running. Now verify | ||
58 | * that no threaded handlers are active. | ||
59 | */ | ||
60 | wait_event(desc->wait_for_threads, !atomic_read(&desc->threads_active)); | ||
56 | } | 61 | } |
57 | EXPORT_SYMBOL(synchronize_irq); | 62 | EXPORT_SYMBOL(synchronize_irq); |
58 | 63 | ||
64 | #ifdef CONFIG_SMP | ||
65 | cpumask_var_t irq_default_affinity; | ||
66 | |||
59 | /** | 67 | /** |
60 | * irq_can_set_affinity - Check if the affinity of a given irq can be set | 68 | * irq_can_set_affinity - Check if the affinity of a given irq can be set |
61 | * @irq: Interrupt to check | 69 | * @irq: Interrupt to check |
@@ -72,6 +80,18 @@ int irq_can_set_affinity(unsigned int irq) | |||
72 | return 1; | 80 | return 1; |
73 | } | 81 | } |
74 | 82 | ||
83 | static void | ||
84 | irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask) | ||
85 | { | ||
86 | struct irqaction *action = desc->action; | ||
87 | |||
88 | while (action) { | ||
89 | if (action->thread) | ||
90 | set_cpus_allowed_ptr(action->thread, cpumask); | ||
91 | action = action->next; | ||
92 | } | ||
93 | } | ||
94 | |||
75 | /** | 95 | /** |
76 | * irq_set_affinity - Set the irq affinity of a given irq | 96 | * irq_set_affinity - Set the irq affinity of a given irq |
77 | * @irq: Interrupt to set affinity | 97 | * @irq: Interrupt to set affinity |
@@ -100,6 +120,7 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask) | |||
100 | cpumask_copy(desc->affinity, cpumask); | 120 | cpumask_copy(desc->affinity, cpumask); |
101 | desc->chip->set_affinity(irq, cpumask); | 121 | desc->chip->set_affinity(irq, cpumask); |
102 | #endif | 122 | #endif |
123 | irq_set_thread_affinity(desc, cpumask); | ||
103 | desc->status |= IRQ_AFFINITY_SET; | 124 | desc->status |= IRQ_AFFINITY_SET; |
104 | spin_unlock_irqrestore(&desc->lock, flags); | 125 | spin_unlock_irqrestore(&desc->lock, flags); |
105 | return 0; | 126 | return 0; |
@@ -150,6 +171,8 @@ int irq_select_affinity_usr(unsigned int irq) | |||
150 | 171 | ||
151 | spin_lock_irqsave(&desc->lock, flags); | 172 | spin_lock_irqsave(&desc->lock, flags); |
152 | ret = setup_affinity(irq, desc); | 173 | ret = setup_affinity(irq, desc); |
174 | if (!ret) | ||
175 | irq_set_thread_affinity(desc, desc->affinity); | ||
153 | spin_unlock_irqrestore(&desc->lock, flags); | 176 | spin_unlock_irqrestore(&desc->lock, flags); |
154 | 177 | ||
155 | return ret; | 178 | return ret; |
@@ -401,6 +424,90 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq, | |||
401 | return ret; | 424 | return ret; |
402 | } | 425 | } |
403 | 426 | ||
427 | static int irq_wait_for_interrupt(struct irqaction *action) | ||
428 | { | ||
429 | while (!kthread_should_stop()) { | ||
430 | set_current_state(TASK_INTERRUPTIBLE); | ||
431 | |||
432 | if (test_and_clear_bit(IRQTF_RUNTHREAD, | ||
433 | &action->thread_flags)) { | ||
434 | __set_current_state(TASK_RUNNING); | ||
435 | return 0; | ||
436 | } | ||
437 | schedule(); | ||
438 | } | ||
439 | return -1; | ||
440 | } | ||
441 | |||
442 | /* | ||
443 | * Interrupt handler thread | ||
444 | */ | ||
445 | static int irq_thread(void *data) | ||
446 | { | ||
447 | struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; | ||
448 | struct irqaction *action = data; | ||
449 | struct irq_desc *desc = irq_to_desc(action->irq); | ||
450 | int wake; | ||
451 | |||
452 | sched_setscheduler(current, SCHED_FIFO, ¶m); | ||
453 | current->irqaction = action; | ||
454 | |||
455 | while (!irq_wait_for_interrupt(action)) { | ||
456 | |||
457 | atomic_inc(&desc->threads_active); | ||
458 | |||
459 | spin_lock_irq(&desc->lock); | ||
460 | if (unlikely(desc->status & IRQ_DISABLED)) { | ||
461 | /* | ||
462 | * CHECKME: We might need a dedicated | ||
463 | * IRQ_THREAD_PENDING flag here, which | ||
464 | * retriggers the thread in check_irq_resend() | ||
465 | * but AFAICT IRQ_PENDING should be fine as it | ||
466 | * retriggers the interrupt itself --- tglx | ||
467 | */ | ||
468 | desc->status |= IRQ_PENDING; | ||
469 | spin_unlock_irq(&desc->lock); | ||
470 | } else { | ||
471 | spin_unlock_irq(&desc->lock); | ||
472 | |||
473 | action->thread_fn(action->irq, action->dev_id); | ||
474 | } | ||
475 | |||
476 | wake = atomic_dec_and_test(&desc->threads_active); | ||
477 | |||
478 | if (wake && waitqueue_active(&desc->wait_for_threads)) | ||
479 | wake_up(&desc->wait_for_threads); | ||
480 | } | ||
481 | |||
482 | /* | ||
483 | * Clear irqaction. Otherwise exit_irq_thread() would make | ||
484 | * fuzz about an active irq thread going into nirvana. | ||
485 | */ | ||
486 | current->irqaction = NULL; | ||
487 | return 0; | ||
488 | } | ||
489 | |||
490 | /* | ||
491 | * Called from do_exit() | ||
492 | */ | ||
493 | void exit_irq_thread(void) | ||
494 | { | ||
495 | struct task_struct *tsk = current; | ||
496 | |||
497 | if (!tsk->irqaction) | ||
498 | return; | ||
499 | |||
500 | printk(KERN_ERR | ||
501 | "exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n", | ||
502 | tsk->comm ? tsk->comm : "", tsk->pid, tsk->irqaction->irq); | ||
503 | |||
504 | /* | ||
505 | * Set the THREAD DIED flag to prevent further wakeups of the | ||
506 | * soon to be gone threaded handler. | ||
507 | */ | ||
508 | set_bit(IRQTF_DIED, &tsk->irqaction->flags); | ||
509 | } | ||
510 | |||
404 | /* | 511 | /* |
405 | * Internal function to register an irqaction - typically used to | 512 | * Internal function to register an irqaction - typically used to |
406 | * allocate special interrupts that are part of the architecture. | 513 | * allocate special interrupts that are part of the architecture. |
@@ -437,6 +544,26 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
437 | } | 544 | } |
438 | 545 | ||
439 | /* | 546 | /* |
547 | * Threaded handler ? | ||
548 | */ | ||
549 | if (new->thread_fn) { | ||
550 | struct task_struct *t; | ||
551 | |||
552 | t = kthread_create(irq_thread, new, "irq/%d-%s", irq, | ||
553 | new->name); | ||
554 | if (IS_ERR(t)) | ||
555 | return PTR_ERR(t); | ||
556 | /* | ||
557 | * We keep the reference to the task struct even if | ||
558 | * the thread dies to avoid that the interrupt code | ||
559 | * references an already freed task_struct. | ||
560 | */ | ||
561 | get_task_struct(t); | ||
562 | new->thread = t; | ||
563 | wake_up_process(t); | ||
564 | } | ||
565 | |||
566 | /* | ||
440 | * The following block of code has to be executed atomically | 567 | * The following block of code has to be executed atomically |
441 | */ | 568 | */ |
442 | spin_lock_irqsave(&desc->lock, flags); | 569 | spin_lock_irqsave(&desc->lock, flags); |
@@ -473,15 +600,15 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
473 | if (!shared) { | 600 | if (!shared) { |
474 | irq_chip_set_defaults(desc->chip); | 601 | irq_chip_set_defaults(desc->chip); |
475 | 602 | ||
603 | init_waitqueue_head(&desc->wait_for_threads); | ||
604 | |||
476 | /* Setup the type (level, edge polarity) if configured: */ | 605 | /* Setup the type (level, edge polarity) if configured: */ |
477 | if (new->flags & IRQF_TRIGGER_MASK) { | 606 | if (new->flags & IRQF_TRIGGER_MASK) { |
478 | ret = __irq_set_trigger(desc, irq, | 607 | ret = __irq_set_trigger(desc, irq, |
479 | new->flags & IRQF_TRIGGER_MASK); | 608 | new->flags & IRQF_TRIGGER_MASK); |
480 | 609 | ||
481 | if (ret) { | 610 | if (ret) |
482 | spin_unlock_irqrestore(&desc->lock, flags); | 611 | goto out_thread; |
483 | return ret; | ||
484 | } | ||
485 | } else | 612 | } else |
486 | compat_irq_chip_set_default_handler(desc); | 613 | compat_irq_chip_set_default_handler(desc); |
487 | #if defined(CONFIG_IRQ_PER_CPU) | 614 | #if defined(CONFIG_IRQ_PER_CPU) |
@@ -549,8 +676,19 @@ mismatch: | |||
549 | dump_stack(); | 676 | dump_stack(); |
550 | } | 677 | } |
551 | #endif | 678 | #endif |
679 | ret = -EBUSY; | ||
680 | |||
681 | out_thread: | ||
552 | spin_unlock_irqrestore(&desc->lock, flags); | 682 | spin_unlock_irqrestore(&desc->lock, flags); |
553 | return -EBUSY; | 683 | if (new->thread) { |
684 | struct task_struct *t = new->thread; | ||
685 | |||
686 | new->thread = NULL; | ||
687 | if (likely(!test_bit(IRQTF_DIED, &new->thread_flags))) | ||
688 | kthread_stop(t); | ||
689 | put_task_struct(t); | ||
690 | } | ||
691 | return ret; | ||
554 | } | 692 | } |
555 | 693 | ||
556 | /** | 694 | /** |
@@ -576,6 +714,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) | |||
576 | { | 714 | { |
577 | struct irq_desc *desc = irq_to_desc(irq); | 715 | struct irq_desc *desc = irq_to_desc(irq); |
578 | struct irqaction *action, **action_ptr; | 716 | struct irqaction *action, **action_ptr; |
717 | struct task_struct *irqthread; | ||
579 | unsigned long flags; | 718 | unsigned long flags; |
580 | 719 | ||
581 | WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); | 720 | WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); |
@@ -622,6 +761,10 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) | |||
622 | else | 761 | else |
623 | desc->chip->disable(irq); | 762 | desc->chip->disable(irq); |
624 | } | 763 | } |
764 | |||
765 | irqthread = action->thread; | ||
766 | action->thread = NULL; | ||
767 | |||
625 | spin_unlock_irqrestore(&desc->lock, flags); | 768 | spin_unlock_irqrestore(&desc->lock, flags); |
626 | 769 | ||
627 | unregister_handler_proc(irq, action); | 770 | unregister_handler_proc(irq, action); |
@@ -629,6 +772,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) | |||
629 | /* Make sure it's not being used on another CPU: */ | 772 | /* Make sure it's not being used on another CPU: */ |
630 | synchronize_irq(irq); | 773 | synchronize_irq(irq); |
631 | 774 | ||
775 | if (irqthread) { | ||
776 | if (!test_bit(IRQTF_DIED, &action->thread_flags)) | ||
777 | kthread_stop(irqthread); | ||
778 | put_task_struct(irqthread); | ||
779 | } | ||
780 | |||
632 | #ifdef CONFIG_DEBUG_SHIRQ | 781 | #ifdef CONFIG_DEBUG_SHIRQ |
633 | /* | 782 | /* |
634 | * It's a shared IRQ -- the driver ought to be prepared for an IRQ | 783 | * It's a shared IRQ -- the driver ought to be prepared for an IRQ |
@@ -681,9 +830,12 @@ void free_irq(unsigned int irq, void *dev_id) | |||
681 | EXPORT_SYMBOL(free_irq); | 830 | EXPORT_SYMBOL(free_irq); |
682 | 831 | ||
683 | /** | 832 | /** |
684 | * request_irq - allocate an interrupt line | 833 | * request_threaded_irq - allocate an interrupt line |
685 | * @irq: Interrupt line to allocate | 834 | * @irq: Interrupt line to allocate |
686 | * @handler: Function to be called when the IRQ occurs | 835 | * @handler: Function to be called when the IRQ occurs. |
836 | * Primary handler for threaded interrupts | ||
837 | * @thread_fn: Function called from the irq handler thread | ||
838 | * If NULL, no irq thread is created | ||
687 | * @irqflags: Interrupt type flags | 839 | * @irqflags: Interrupt type flags |
688 | * @devname: An ascii name for the claiming device | 840 | * @devname: An ascii name for the claiming device |
689 | * @dev_id: A cookie passed back to the handler function | 841 | * @dev_id: A cookie passed back to the handler function |
@@ -695,6 +847,15 @@ EXPORT_SYMBOL(free_irq); | |||
695 | * raises, you must take care both to initialise your hardware | 847 | * raises, you must take care both to initialise your hardware |
696 | * and to set up the interrupt handler in the right order. | 848 | * and to set up the interrupt handler in the right order. |
697 | * | 849 | * |
850 | * If you want to set up a threaded irq handler for your device | ||
851 | * then you need to supply @handler and @thread_fn. @handler ist | ||
852 | * still called in hard interrupt context and has to check | ||
853 | * whether the interrupt originates from the device. If yes it | ||
854 | * needs to disable the interrupt on the device and return | ||
855 | * IRQ_THREAD_WAKE which will wake up the handler thread and run | ||
856 | * @thread_fn. This split handler design is necessary to support | ||
857 | * shared interrupts. | ||
858 | * | ||
698 | * Dev_id must be globally unique. Normally the address of the | 859 | * Dev_id must be globally unique. Normally the address of the |
699 | * device data structure is used as the cookie. Since the handler | 860 | * device data structure is used as the cookie. Since the handler |
700 | * receives this value it makes sense to use it. | 861 | * receives this value it makes sense to use it. |
@@ -710,8 +871,9 @@ EXPORT_SYMBOL(free_irq); | |||
710 | * IRQF_TRIGGER_* Specify active edge(s) or level | 871 | * IRQF_TRIGGER_* Specify active edge(s) or level |
711 | * | 872 | * |
712 | */ | 873 | */ |
713 | int request_irq(unsigned int irq, irq_handler_t handler, | 874 | int request_threaded_irq(unsigned int irq, irq_handler_t handler, |
714 | unsigned long irqflags, const char *devname, void *dev_id) | 875 | irq_handler_t thread_fn, unsigned long irqflags, |
876 | const char *devname, void *dev_id) | ||
715 | { | 877 | { |
716 | struct irqaction *action; | 878 | struct irqaction *action; |
717 | struct irq_desc *desc; | 879 | struct irq_desc *desc; |
@@ -759,6 +921,7 @@ int request_irq(unsigned int irq, irq_handler_t handler, | |||
759 | return -ENOMEM; | 921 | return -ENOMEM; |
760 | 922 | ||
761 | action->handler = handler; | 923 | action->handler = handler; |
924 | action->thread_fn = thread_fn; | ||
762 | action->flags = irqflags; | 925 | action->flags = irqflags; |
763 | action->name = devname; | 926 | action->name = devname; |
764 | action->dev_id = dev_id; | 927 | action->dev_id = dev_id; |
@@ -788,4 +951,4 @@ int request_irq(unsigned int irq, irq_handler_t handler, | |||
788 | #endif | 951 | #endif |
789 | return retval; | 952 | return retval; |
790 | } | 953 | } |
791 | EXPORT_SYMBOL(request_irq); | 954 | EXPORT_SYMBOL(request_threaded_irq); |
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index 243d6121e50e..44bbdcbaf8d2 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c | |||
@@ -54,6 +54,7 @@ static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc, | |||
54 | static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) | 54 | static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc) |
55 | { | 55 | { |
56 | free_kstat_irqs(old_desc, desc); | 56 | free_kstat_irqs(old_desc, desc); |
57 | free_desc_masks(old_desc, desc); | ||
57 | arch_free_chip_data(old_desc, desc); | 58 | arch_free_chip_data(old_desc, desc); |
58 | } | 59 | } |
59 | 60 | ||
diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 5016bfb682b9..a5e74ddee0e2 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c | |||
@@ -68,7 +68,7 @@ static struct hlist_head kprobe_table[KPROBE_TABLE_SIZE]; | |||
68 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; | 68 | static struct hlist_head kretprobe_inst_table[KPROBE_TABLE_SIZE]; |
69 | 69 | ||
70 | /* NOTE: change this value only with kprobe_mutex held */ | 70 | /* NOTE: change this value only with kprobe_mutex held */ |
71 | static bool kprobe_enabled; | 71 | static bool kprobes_all_disarmed; |
72 | 72 | ||
73 | static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ | 73 | static DEFINE_MUTEX(kprobe_mutex); /* Protects kprobe_table */ |
74 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; | 74 | static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL; |
@@ -328,7 +328,7 @@ static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) | |||
328 | struct kprobe *kp; | 328 | struct kprobe *kp; |
329 | 329 | ||
330 | list_for_each_entry_rcu(kp, &p->list, list) { | 330 | list_for_each_entry_rcu(kp, &p->list, list) { |
331 | if (kp->pre_handler && !kprobe_gone(kp)) { | 331 | if (kp->pre_handler && likely(!kprobe_disabled(kp))) { |
332 | set_kprobe_instance(kp); | 332 | set_kprobe_instance(kp); |
333 | if (kp->pre_handler(kp, regs)) | 333 | if (kp->pre_handler(kp, regs)) |
334 | return 1; | 334 | return 1; |
@@ -344,7 +344,7 @@ static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, | |||
344 | struct kprobe *kp; | 344 | struct kprobe *kp; |
345 | 345 | ||
346 | list_for_each_entry_rcu(kp, &p->list, list) { | 346 | list_for_each_entry_rcu(kp, &p->list, list) { |
347 | if (kp->post_handler && !kprobe_gone(kp)) { | 347 | if (kp->post_handler && likely(!kprobe_disabled(kp))) { |
348 | set_kprobe_instance(kp); | 348 | set_kprobe_instance(kp); |
349 | kp->post_handler(kp, regs, flags); | 349 | kp->post_handler(kp, regs, flags); |
350 | reset_kprobe_instance(); | 350 | reset_kprobe_instance(); |
@@ -518,20 +518,28 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) | |||
518 | } | 518 | } |
519 | 519 | ||
520 | /* | 520 | /* |
521 | * Add the new probe to old_p->list. Fail if this is the | 521 | * Add the new probe to ap->list. Fail if this is the |
522 | * second jprobe at the address - two jprobes can't coexist | 522 | * second jprobe at the address - two jprobes can't coexist |
523 | */ | 523 | */ |
524 | static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) | 524 | static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) |
525 | { | 525 | { |
526 | BUG_ON(kprobe_gone(ap) || kprobe_gone(p)); | ||
526 | if (p->break_handler) { | 527 | if (p->break_handler) { |
527 | if (old_p->break_handler) | 528 | if (ap->break_handler) |
528 | return -EEXIST; | 529 | return -EEXIST; |
529 | list_add_tail_rcu(&p->list, &old_p->list); | 530 | list_add_tail_rcu(&p->list, &ap->list); |
530 | old_p->break_handler = aggr_break_handler; | 531 | ap->break_handler = aggr_break_handler; |
531 | } else | 532 | } else |
532 | list_add_rcu(&p->list, &old_p->list); | 533 | list_add_rcu(&p->list, &ap->list); |
533 | if (p->post_handler && !old_p->post_handler) | 534 | if (p->post_handler && !ap->post_handler) |
534 | old_p->post_handler = aggr_post_handler; | 535 | ap->post_handler = aggr_post_handler; |
536 | |||
537 | if (kprobe_disabled(ap) && !kprobe_disabled(p)) { | ||
538 | ap->flags &= ~KPROBE_FLAG_DISABLED; | ||
539 | if (!kprobes_all_disarmed) | ||
540 | /* Arm the breakpoint again. */ | ||
541 | arch_arm_kprobe(ap); | ||
542 | } | ||
535 | return 0; | 543 | return 0; |
536 | } | 544 | } |
537 | 545 | ||
@@ -544,6 +552,7 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) | |||
544 | copy_kprobe(p, ap); | 552 | copy_kprobe(p, ap); |
545 | flush_insn_slot(ap); | 553 | flush_insn_slot(ap); |
546 | ap->addr = p->addr; | 554 | ap->addr = p->addr; |
555 | ap->flags = p->flags; | ||
547 | ap->pre_handler = aggr_pre_handler; | 556 | ap->pre_handler = aggr_pre_handler; |
548 | ap->fault_handler = aggr_fault_handler; | 557 | ap->fault_handler = aggr_fault_handler; |
549 | /* We don't care the kprobe which has gone. */ | 558 | /* We don't care the kprobe which has gone. */ |
@@ -566,44 +575,59 @@ static int __kprobes register_aggr_kprobe(struct kprobe *old_p, | |||
566 | struct kprobe *p) | 575 | struct kprobe *p) |
567 | { | 576 | { |
568 | int ret = 0; | 577 | int ret = 0; |
569 | struct kprobe *ap; | 578 | struct kprobe *ap = old_p; |
570 | 579 | ||
571 | if (kprobe_gone(old_p)) { | 580 | if (old_p->pre_handler != aggr_pre_handler) { |
581 | /* If old_p is not an aggr_probe, create new aggr_kprobe. */ | ||
582 | ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); | ||
583 | if (!ap) | ||
584 | return -ENOMEM; | ||
585 | add_aggr_kprobe(ap, old_p); | ||
586 | } | ||
587 | |||
588 | if (kprobe_gone(ap)) { | ||
572 | /* | 589 | /* |
573 | * Attempting to insert new probe at the same location that | 590 | * Attempting to insert new probe at the same location that |
574 | * had a probe in the module vaddr area which already | 591 | * had a probe in the module vaddr area which already |
575 | * freed. So, the instruction slot has already been | 592 | * freed. So, the instruction slot has already been |
576 | * released. We need a new slot for the new probe. | 593 | * released. We need a new slot for the new probe. |
577 | */ | 594 | */ |
578 | ret = arch_prepare_kprobe(old_p); | 595 | ret = arch_prepare_kprobe(ap); |
579 | if (ret) | 596 | if (ret) |
597 | /* | ||
598 | * Even if fail to allocate new slot, don't need to | ||
599 | * free aggr_probe. It will be used next time, or | ||
600 | * freed by unregister_kprobe. | ||
601 | */ | ||
580 | return ret; | 602 | return ret; |
581 | } | 603 | |
582 | if (old_p->pre_handler == aggr_pre_handler) { | ||
583 | copy_kprobe(old_p, p); | ||
584 | ret = add_new_kprobe(old_p, p); | ||
585 | ap = old_p; | ||
586 | } else { | ||
587 | ap = kzalloc(sizeof(struct kprobe), GFP_KERNEL); | ||
588 | if (!ap) { | ||
589 | if (kprobe_gone(old_p)) | ||
590 | arch_remove_kprobe(old_p); | ||
591 | return -ENOMEM; | ||
592 | } | ||
593 | add_aggr_kprobe(ap, old_p); | ||
594 | copy_kprobe(ap, p); | ||
595 | ret = add_new_kprobe(ap, p); | ||
596 | } | ||
597 | if (kprobe_gone(old_p)) { | ||
598 | /* | 604 | /* |
599 | * If the old_p has gone, its breakpoint has been disarmed. | 605 | * Clear gone flag to prevent allocating new slot again, and |
600 | * We have to arm it again after preparing real kprobes. | 606 | * set disabled flag because it is not armed yet. |
601 | */ | 607 | */ |
602 | ap->flags &= ~KPROBE_FLAG_GONE; | 608 | ap->flags = (ap->flags & ~KPROBE_FLAG_GONE) |
603 | if (kprobe_enabled) | 609 | | KPROBE_FLAG_DISABLED; |
604 | arch_arm_kprobe(ap); | ||
605 | } | 610 | } |
606 | return ret; | 611 | |
612 | copy_kprobe(ap, p); | ||
613 | return add_new_kprobe(ap, p); | ||
614 | } | ||
615 | |||
616 | /* Try to disable aggr_kprobe, and return 1 if succeeded.*/ | ||
617 | static int __kprobes try_to_disable_aggr_kprobe(struct kprobe *p) | ||
618 | { | ||
619 | struct kprobe *kp; | ||
620 | |||
621 | list_for_each_entry_rcu(kp, &p->list, list) { | ||
622 | if (!kprobe_disabled(kp)) | ||
623 | /* | ||
624 | * There is an active probe on the list. | ||
625 | * We can't disable aggr_kprobe. | ||
626 | */ | ||
627 | return 0; | ||
628 | } | ||
629 | p->flags |= KPROBE_FLAG_DISABLED; | ||
630 | return 1; | ||
607 | } | 631 | } |
608 | 632 | ||
609 | static int __kprobes in_kprobes_functions(unsigned long addr) | 633 | static int __kprobes in_kprobes_functions(unsigned long addr) |
@@ -664,7 +688,9 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
664 | return -EINVAL; | 688 | return -EINVAL; |
665 | } | 689 | } |
666 | 690 | ||
667 | p->flags = 0; | 691 | /* User can pass only KPROBE_FLAG_DISABLED to register_kprobe */ |
692 | p->flags &= KPROBE_FLAG_DISABLED; | ||
693 | |||
668 | /* | 694 | /* |
669 | * Check if are we probing a module. | 695 | * Check if are we probing a module. |
670 | */ | 696 | */ |
@@ -709,7 +735,7 @@ int __kprobes register_kprobe(struct kprobe *p) | |||
709 | hlist_add_head_rcu(&p->hlist, | 735 | hlist_add_head_rcu(&p->hlist, |
710 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); | 736 | &kprobe_table[hash_ptr(p->addr, KPROBE_HASH_BITS)]); |
711 | 737 | ||
712 | if (kprobe_enabled) | 738 | if (!kprobes_all_disarmed && !kprobe_disabled(p)) |
713 | arch_arm_kprobe(p); | 739 | arch_arm_kprobe(p); |
714 | 740 | ||
715 | out_unlock_text: | 741 | out_unlock_text: |
@@ -722,26 +748,39 @@ out: | |||
722 | 748 | ||
723 | return ret; | 749 | return ret; |
724 | } | 750 | } |
751 | EXPORT_SYMBOL_GPL(register_kprobe); | ||
725 | 752 | ||
726 | /* | 753 | /* Check passed kprobe is valid and return kprobe in kprobe_table. */ |
727 | * Unregister a kprobe without a scheduler synchronization. | 754 | static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) |
728 | */ | ||
729 | static int __kprobes __unregister_kprobe_top(struct kprobe *p) | ||
730 | { | 755 | { |
731 | struct kprobe *old_p, *list_p; | 756 | struct kprobe *old_p, *list_p; |
732 | 757 | ||
733 | old_p = get_kprobe(p->addr); | 758 | old_p = get_kprobe(p->addr); |
734 | if (unlikely(!old_p)) | 759 | if (unlikely(!old_p)) |
735 | return -EINVAL; | 760 | return NULL; |
736 | 761 | ||
737 | if (p != old_p) { | 762 | if (p != old_p) { |
738 | list_for_each_entry_rcu(list_p, &old_p->list, list) | 763 | list_for_each_entry_rcu(list_p, &old_p->list, list) |
739 | if (list_p == p) | 764 | if (list_p == p) |
740 | /* kprobe p is a valid probe */ | 765 | /* kprobe p is a valid probe */ |
741 | goto valid_p; | 766 | goto valid; |
742 | return -EINVAL; | 767 | return NULL; |
743 | } | 768 | } |
744 | valid_p: | 769 | valid: |
770 | return old_p; | ||
771 | } | ||
772 | |||
773 | /* | ||
774 | * Unregister a kprobe without a scheduler synchronization. | ||
775 | */ | ||
776 | static int __kprobes __unregister_kprobe_top(struct kprobe *p) | ||
777 | { | ||
778 | struct kprobe *old_p, *list_p; | ||
779 | |||
780 | old_p = __get_valid_kprobe(p); | ||
781 | if (old_p == NULL) | ||
782 | return -EINVAL; | ||
783 | |||
745 | if (old_p == p || | 784 | if (old_p == p || |
746 | (old_p->pre_handler == aggr_pre_handler && | 785 | (old_p->pre_handler == aggr_pre_handler && |
747 | list_is_singular(&old_p->list))) { | 786 | list_is_singular(&old_p->list))) { |
@@ -750,7 +789,7 @@ valid_p: | |||
750 | * enabled and not gone - otherwise, the breakpoint would | 789 | * enabled and not gone - otherwise, the breakpoint would |
751 | * already have been removed. We save on flushing icache. | 790 | * already have been removed. We save on flushing icache. |
752 | */ | 791 | */ |
753 | if (kprobe_enabled && !kprobe_gone(old_p)) { | 792 | if (!kprobes_all_disarmed && !kprobe_disabled(old_p)) { |
754 | mutex_lock(&text_mutex); | 793 | mutex_lock(&text_mutex); |
755 | arch_disarm_kprobe(p); | 794 | arch_disarm_kprobe(p); |
756 | mutex_unlock(&text_mutex); | 795 | mutex_unlock(&text_mutex); |
@@ -768,6 +807,11 @@ valid_p: | |||
768 | } | 807 | } |
769 | noclean: | 808 | noclean: |
770 | list_del_rcu(&p->list); | 809 | list_del_rcu(&p->list); |
810 | if (!kprobe_disabled(old_p)) { | ||
811 | try_to_disable_aggr_kprobe(old_p); | ||
812 | if (!kprobes_all_disarmed && kprobe_disabled(old_p)) | ||
813 | arch_disarm_kprobe(old_p); | ||
814 | } | ||
771 | } | 815 | } |
772 | return 0; | 816 | return 0; |
773 | } | 817 | } |
@@ -803,11 +847,13 @@ int __kprobes register_kprobes(struct kprobe **kps, int num) | |||
803 | } | 847 | } |
804 | return ret; | 848 | return ret; |
805 | } | 849 | } |
850 | EXPORT_SYMBOL_GPL(register_kprobes); | ||
806 | 851 | ||
807 | void __kprobes unregister_kprobe(struct kprobe *p) | 852 | void __kprobes unregister_kprobe(struct kprobe *p) |
808 | { | 853 | { |
809 | unregister_kprobes(&p, 1); | 854 | unregister_kprobes(&p, 1); |
810 | } | 855 | } |
856 | EXPORT_SYMBOL_GPL(unregister_kprobe); | ||
811 | 857 | ||
812 | void __kprobes unregister_kprobes(struct kprobe **kps, int num) | 858 | void __kprobes unregister_kprobes(struct kprobe **kps, int num) |
813 | { | 859 | { |
@@ -826,6 +872,7 @@ void __kprobes unregister_kprobes(struct kprobe **kps, int num) | |||
826 | if (kps[i]->addr) | 872 | if (kps[i]->addr) |
827 | __unregister_kprobe_bottom(kps[i]); | 873 | __unregister_kprobe_bottom(kps[i]); |
828 | } | 874 | } |
875 | EXPORT_SYMBOL_GPL(unregister_kprobes); | ||
829 | 876 | ||
830 | static struct notifier_block kprobe_exceptions_nb = { | 877 | static struct notifier_block kprobe_exceptions_nb = { |
831 | .notifier_call = kprobe_exceptions_notify, | 878 | .notifier_call = kprobe_exceptions_notify, |
@@ -865,16 +912,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num) | |||
865 | } | 912 | } |
866 | return ret; | 913 | return ret; |
867 | } | 914 | } |
915 | EXPORT_SYMBOL_GPL(register_jprobes); | ||
868 | 916 | ||
869 | int __kprobes register_jprobe(struct jprobe *jp) | 917 | int __kprobes register_jprobe(struct jprobe *jp) |
870 | { | 918 | { |
871 | return register_jprobes(&jp, 1); | 919 | return register_jprobes(&jp, 1); |
872 | } | 920 | } |
921 | EXPORT_SYMBOL_GPL(register_jprobe); | ||
873 | 922 | ||
874 | void __kprobes unregister_jprobe(struct jprobe *jp) | 923 | void __kprobes unregister_jprobe(struct jprobe *jp) |
875 | { | 924 | { |
876 | unregister_jprobes(&jp, 1); | 925 | unregister_jprobes(&jp, 1); |
877 | } | 926 | } |
927 | EXPORT_SYMBOL_GPL(unregister_jprobe); | ||
878 | 928 | ||
879 | void __kprobes unregister_jprobes(struct jprobe **jps, int num) | 929 | void __kprobes unregister_jprobes(struct jprobe **jps, int num) |
880 | { | 930 | { |
@@ -894,6 +944,7 @@ void __kprobes unregister_jprobes(struct jprobe **jps, int num) | |||
894 | __unregister_kprobe_bottom(&jps[i]->kp); | 944 | __unregister_kprobe_bottom(&jps[i]->kp); |
895 | } | 945 | } |
896 | } | 946 | } |
947 | EXPORT_SYMBOL_GPL(unregister_jprobes); | ||
897 | 948 | ||
898 | #ifdef CONFIG_KRETPROBES | 949 | #ifdef CONFIG_KRETPROBES |
899 | /* | 950 | /* |
@@ -987,6 +1038,7 @@ int __kprobes register_kretprobe(struct kretprobe *rp) | |||
987 | free_rp_inst(rp); | 1038 | free_rp_inst(rp); |
988 | return ret; | 1039 | return ret; |
989 | } | 1040 | } |
1041 | EXPORT_SYMBOL_GPL(register_kretprobe); | ||
990 | 1042 | ||
991 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) | 1043 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) |
992 | { | 1044 | { |
@@ -1004,11 +1056,13 @@ int __kprobes register_kretprobes(struct kretprobe **rps, int num) | |||
1004 | } | 1056 | } |
1005 | return ret; | 1057 | return ret; |
1006 | } | 1058 | } |
1059 | EXPORT_SYMBOL_GPL(register_kretprobes); | ||
1007 | 1060 | ||
1008 | void __kprobes unregister_kretprobe(struct kretprobe *rp) | 1061 | void __kprobes unregister_kretprobe(struct kretprobe *rp) |
1009 | { | 1062 | { |
1010 | unregister_kretprobes(&rp, 1); | 1063 | unregister_kretprobes(&rp, 1); |
1011 | } | 1064 | } |
1065 | EXPORT_SYMBOL_GPL(unregister_kretprobe); | ||
1012 | 1066 | ||
1013 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) | 1067 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) |
1014 | { | 1068 | { |
@@ -1030,24 +1084,30 @@ void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) | |||
1030 | } | 1084 | } |
1031 | } | 1085 | } |
1032 | } | 1086 | } |
1087 | EXPORT_SYMBOL_GPL(unregister_kretprobes); | ||
1033 | 1088 | ||
1034 | #else /* CONFIG_KRETPROBES */ | 1089 | #else /* CONFIG_KRETPROBES */ |
1035 | int __kprobes register_kretprobe(struct kretprobe *rp) | 1090 | int __kprobes register_kretprobe(struct kretprobe *rp) |
1036 | { | 1091 | { |
1037 | return -ENOSYS; | 1092 | return -ENOSYS; |
1038 | } | 1093 | } |
1094 | EXPORT_SYMBOL_GPL(register_kretprobe); | ||
1039 | 1095 | ||
1040 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) | 1096 | int __kprobes register_kretprobes(struct kretprobe **rps, int num) |
1041 | { | 1097 | { |
1042 | return -ENOSYS; | 1098 | return -ENOSYS; |
1043 | } | 1099 | } |
1100 | EXPORT_SYMBOL_GPL(register_kretprobes); | ||
1101 | |||
1044 | void __kprobes unregister_kretprobe(struct kretprobe *rp) | 1102 | void __kprobes unregister_kretprobe(struct kretprobe *rp) |
1045 | { | 1103 | { |
1046 | } | 1104 | } |
1105 | EXPORT_SYMBOL_GPL(unregister_kretprobe); | ||
1047 | 1106 | ||
1048 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) | 1107 | void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) |
1049 | { | 1108 | { |
1050 | } | 1109 | } |
1110 | EXPORT_SYMBOL_GPL(unregister_kretprobes); | ||
1051 | 1111 | ||
1052 | static int __kprobes pre_handler_kretprobe(struct kprobe *p, | 1112 | static int __kprobes pre_handler_kretprobe(struct kprobe *p, |
1053 | struct pt_regs *regs) | 1113 | struct pt_regs *regs) |
@@ -1061,6 +1121,7 @@ static int __kprobes pre_handler_kretprobe(struct kprobe *p, | |||
1061 | static void __kprobes kill_kprobe(struct kprobe *p) | 1121 | static void __kprobes kill_kprobe(struct kprobe *p) |
1062 | { | 1122 | { |
1063 | struct kprobe *kp; | 1123 | struct kprobe *kp; |
1124 | |||
1064 | p->flags |= KPROBE_FLAG_GONE; | 1125 | p->flags |= KPROBE_FLAG_GONE; |
1065 | if (p->pre_handler == aggr_pre_handler) { | 1126 | if (p->pre_handler == aggr_pre_handler) { |
1066 | /* | 1127 | /* |
@@ -1173,8 +1234,8 @@ static int __init init_kprobes(void) | |||
1173 | } | 1234 | } |
1174 | } | 1235 | } |
1175 | 1236 | ||
1176 | /* By default, kprobes are enabled */ | 1237 | /* By default, kprobes are armed */ |
1177 | kprobe_enabled = true; | 1238 | kprobes_all_disarmed = false; |
1178 | 1239 | ||
1179 | err = arch_init_kprobes(); | 1240 | err = arch_init_kprobes(); |
1180 | if (!err) | 1241 | if (!err) |
@@ -1202,12 +1263,18 @@ static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, | |||
1202 | else | 1263 | else |
1203 | kprobe_type = "k"; | 1264 | kprobe_type = "k"; |
1204 | if (sym) | 1265 | if (sym) |
1205 | seq_printf(pi, "%p %s %s+0x%x %s %s\n", p->addr, kprobe_type, | 1266 | seq_printf(pi, "%p %s %s+0x%x %s %s%s\n", |
1206 | sym, offset, (modname ? modname : " "), | 1267 | p->addr, kprobe_type, sym, offset, |
1207 | (kprobe_gone(p) ? "[GONE]" : "")); | 1268 | (modname ? modname : " "), |
1269 | (kprobe_gone(p) ? "[GONE]" : ""), | ||
1270 | ((kprobe_disabled(p) && !kprobe_gone(p)) ? | ||
1271 | "[DISABLED]" : "")); | ||
1208 | else | 1272 | else |
1209 | seq_printf(pi, "%p %s %p %s\n", p->addr, kprobe_type, p->addr, | 1273 | seq_printf(pi, "%p %s %p %s%s\n", |
1210 | (kprobe_gone(p) ? "[GONE]" : "")); | 1274 | p->addr, kprobe_type, p->addr, |
1275 | (kprobe_gone(p) ? "[GONE]" : ""), | ||
1276 | ((kprobe_disabled(p) && !kprobe_gone(p)) ? | ||
1277 | "[DISABLED]" : "")); | ||
1211 | } | 1278 | } |
1212 | 1279 | ||
1213 | static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) | 1280 | static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) |
@@ -1272,7 +1339,72 @@ static struct file_operations debugfs_kprobes_operations = { | |||
1272 | .release = seq_release, | 1339 | .release = seq_release, |
1273 | }; | 1340 | }; |
1274 | 1341 | ||
1275 | static void __kprobes enable_all_kprobes(void) | 1342 | /* Disable one kprobe */ |
1343 | int __kprobes disable_kprobe(struct kprobe *kp) | ||
1344 | { | ||
1345 | int ret = 0; | ||
1346 | struct kprobe *p; | ||
1347 | |||
1348 | mutex_lock(&kprobe_mutex); | ||
1349 | |||
1350 | /* Check whether specified probe is valid. */ | ||
1351 | p = __get_valid_kprobe(kp); | ||
1352 | if (unlikely(p == NULL)) { | ||
1353 | ret = -EINVAL; | ||
1354 | goto out; | ||
1355 | } | ||
1356 | |||
1357 | /* If the probe is already disabled (or gone), just return */ | ||
1358 | if (kprobe_disabled(kp)) | ||
1359 | goto out; | ||
1360 | |||
1361 | kp->flags |= KPROBE_FLAG_DISABLED; | ||
1362 | if (p != kp) | ||
1363 | /* When kp != p, p is always enabled. */ | ||
1364 | try_to_disable_aggr_kprobe(p); | ||
1365 | |||
1366 | if (!kprobes_all_disarmed && kprobe_disabled(p)) | ||
1367 | arch_disarm_kprobe(p); | ||
1368 | out: | ||
1369 | mutex_unlock(&kprobe_mutex); | ||
1370 | return ret; | ||
1371 | } | ||
1372 | EXPORT_SYMBOL_GPL(disable_kprobe); | ||
1373 | |||
1374 | /* Enable one kprobe */ | ||
1375 | int __kprobes enable_kprobe(struct kprobe *kp) | ||
1376 | { | ||
1377 | int ret = 0; | ||
1378 | struct kprobe *p; | ||
1379 | |||
1380 | mutex_lock(&kprobe_mutex); | ||
1381 | |||
1382 | /* Check whether specified probe is valid. */ | ||
1383 | p = __get_valid_kprobe(kp); | ||
1384 | if (unlikely(p == NULL)) { | ||
1385 | ret = -EINVAL; | ||
1386 | goto out; | ||
1387 | } | ||
1388 | |||
1389 | if (kprobe_gone(kp)) { | ||
1390 | /* This kprobe has gone, we couldn't enable it. */ | ||
1391 | ret = -EINVAL; | ||
1392 | goto out; | ||
1393 | } | ||
1394 | |||
1395 | if (!kprobes_all_disarmed && kprobe_disabled(p)) | ||
1396 | arch_arm_kprobe(p); | ||
1397 | |||
1398 | p->flags &= ~KPROBE_FLAG_DISABLED; | ||
1399 | if (p != kp) | ||
1400 | kp->flags &= ~KPROBE_FLAG_DISABLED; | ||
1401 | out: | ||
1402 | mutex_unlock(&kprobe_mutex); | ||
1403 | return ret; | ||
1404 | } | ||
1405 | EXPORT_SYMBOL_GPL(enable_kprobe); | ||
1406 | |||
1407 | static void __kprobes arm_all_kprobes(void) | ||
1276 | { | 1408 | { |
1277 | struct hlist_head *head; | 1409 | struct hlist_head *head; |
1278 | struct hlist_node *node; | 1410 | struct hlist_node *node; |
@@ -1281,20 +1413,20 @@ static void __kprobes enable_all_kprobes(void) | |||
1281 | 1413 | ||
1282 | mutex_lock(&kprobe_mutex); | 1414 | mutex_lock(&kprobe_mutex); |
1283 | 1415 | ||
1284 | /* If kprobes are already enabled, just return */ | 1416 | /* If kprobes are armed, just return */ |
1285 | if (kprobe_enabled) | 1417 | if (!kprobes_all_disarmed) |
1286 | goto already_enabled; | 1418 | goto already_enabled; |
1287 | 1419 | ||
1288 | mutex_lock(&text_mutex); | 1420 | mutex_lock(&text_mutex); |
1289 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 1421 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
1290 | head = &kprobe_table[i]; | 1422 | head = &kprobe_table[i]; |
1291 | hlist_for_each_entry_rcu(p, node, head, hlist) | 1423 | hlist_for_each_entry_rcu(p, node, head, hlist) |
1292 | if (!kprobe_gone(p)) | 1424 | if (!kprobe_disabled(p)) |
1293 | arch_arm_kprobe(p); | 1425 | arch_arm_kprobe(p); |
1294 | } | 1426 | } |
1295 | mutex_unlock(&text_mutex); | 1427 | mutex_unlock(&text_mutex); |
1296 | 1428 | ||
1297 | kprobe_enabled = true; | 1429 | kprobes_all_disarmed = false; |
1298 | printk(KERN_INFO "Kprobes globally enabled\n"); | 1430 | printk(KERN_INFO "Kprobes globally enabled\n"); |
1299 | 1431 | ||
1300 | already_enabled: | 1432 | already_enabled: |
@@ -1302,7 +1434,7 @@ already_enabled: | |||
1302 | return; | 1434 | return; |
1303 | } | 1435 | } |
1304 | 1436 | ||
1305 | static void __kprobes disable_all_kprobes(void) | 1437 | static void __kprobes disarm_all_kprobes(void) |
1306 | { | 1438 | { |
1307 | struct hlist_head *head; | 1439 | struct hlist_head *head; |
1308 | struct hlist_node *node; | 1440 | struct hlist_node *node; |
@@ -1311,17 +1443,17 @@ static void __kprobes disable_all_kprobes(void) | |||
1311 | 1443 | ||
1312 | mutex_lock(&kprobe_mutex); | 1444 | mutex_lock(&kprobe_mutex); |
1313 | 1445 | ||
1314 | /* If kprobes are already disabled, just return */ | 1446 | /* If kprobes are already disarmed, just return */ |
1315 | if (!kprobe_enabled) | 1447 | if (kprobes_all_disarmed) |
1316 | goto already_disabled; | 1448 | goto already_disabled; |
1317 | 1449 | ||
1318 | kprobe_enabled = false; | 1450 | kprobes_all_disarmed = true; |
1319 | printk(KERN_INFO "Kprobes globally disabled\n"); | 1451 | printk(KERN_INFO "Kprobes globally disabled\n"); |
1320 | mutex_lock(&text_mutex); | 1452 | mutex_lock(&text_mutex); |
1321 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { | 1453 | for (i = 0; i < KPROBE_TABLE_SIZE; i++) { |
1322 | head = &kprobe_table[i]; | 1454 | head = &kprobe_table[i]; |
1323 | hlist_for_each_entry_rcu(p, node, head, hlist) { | 1455 | hlist_for_each_entry_rcu(p, node, head, hlist) { |
1324 | if (!arch_trampoline_kprobe(p) && !kprobe_gone(p)) | 1456 | if (!arch_trampoline_kprobe(p) && !kprobe_disabled(p)) |
1325 | arch_disarm_kprobe(p); | 1457 | arch_disarm_kprobe(p); |
1326 | } | 1458 | } |
1327 | } | 1459 | } |
@@ -1347,7 +1479,7 @@ static ssize_t read_enabled_file_bool(struct file *file, | |||
1347 | { | 1479 | { |
1348 | char buf[3]; | 1480 | char buf[3]; |
1349 | 1481 | ||
1350 | if (kprobe_enabled) | 1482 | if (!kprobes_all_disarmed) |
1351 | buf[0] = '1'; | 1483 | buf[0] = '1'; |
1352 | else | 1484 | else |
1353 | buf[0] = '0'; | 1485 | buf[0] = '0'; |
@@ -1370,12 +1502,12 @@ static ssize_t write_enabled_file_bool(struct file *file, | |||
1370 | case 'y': | 1502 | case 'y': |
1371 | case 'Y': | 1503 | case 'Y': |
1372 | case '1': | 1504 | case '1': |
1373 | enable_all_kprobes(); | 1505 | arm_all_kprobes(); |
1374 | break; | 1506 | break; |
1375 | case 'n': | 1507 | case 'n': |
1376 | case 'N': | 1508 | case 'N': |
1377 | case '0': | 1509 | case '0': |
1378 | disable_all_kprobes(); | 1510 | disarm_all_kprobes(); |
1379 | break; | 1511 | break; |
1380 | } | 1512 | } |
1381 | 1513 | ||
@@ -1418,16 +1550,5 @@ late_initcall(debugfs_kprobe_init); | |||
1418 | 1550 | ||
1419 | module_init(init_kprobes); | 1551 | module_init(init_kprobes); |
1420 | 1552 | ||
1421 | EXPORT_SYMBOL_GPL(register_kprobe); | 1553 | /* defined in arch/.../kernel/kprobes.c */ |
1422 | EXPORT_SYMBOL_GPL(unregister_kprobe); | ||
1423 | EXPORT_SYMBOL_GPL(register_kprobes); | ||
1424 | EXPORT_SYMBOL_GPL(unregister_kprobes); | ||
1425 | EXPORT_SYMBOL_GPL(register_jprobe); | ||
1426 | EXPORT_SYMBOL_GPL(unregister_jprobe); | ||
1427 | EXPORT_SYMBOL_GPL(register_jprobes); | ||
1428 | EXPORT_SYMBOL_GPL(unregister_jprobes); | ||
1429 | EXPORT_SYMBOL_GPL(jprobe_return); | 1554 | EXPORT_SYMBOL_GPL(jprobe_return); |
1430 | EXPORT_SYMBOL_GPL(register_kretprobe); | ||
1431 | EXPORT_SYMBOL_GPL(unregister_kretprobe); | ||
1432 | EXPORT_SYMBOL_GPL(register_kretprobes); | ||
1433 | EXPORT_SYMBOL_GPL(unregister_kretprobes); | ||
diff --git a/kernel/kthread.c b/kernel/kthread.c index 84bbadd4d021..4ebaf8519abf 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c | |||
@@ -76,6 +76,7 @@ static int kthread(void *_create) | |||
76 | 76 | ||
77 | /* OK, tell user we're spawned, wait for stop or wakeup */ | 77 | /* OK, tell user we're spawned, wait for stop or wakeup */ |
78 | __set_current_state(TASK_UNINTERRUPTIBLE); | 78 | __set_current_state(TASK_UNINTERRUPTIBLE); |
79 | create->result = current; | ||
79 | complete(&create->started); | 80 | complete(&create->started); |
80 | schedule(); | 81 | schedule(); |
81 | 82 | ||
@@ -96,22 +97,10 @@ static void create_kthread(struct kthread_create_info *create) | |||
96 | 97 | ||
97 | /* We want our own signal handler (we take no signals by default). */ | 98 | /* We want our own signal handler (we take no signals by default). */ |
98 | pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); | 99 | pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); |
99 | if (pid < 0) { | 100 | if (pid < 0) |
100 | create->result = ERR_PTR(pid); | 101 | create->result = ERR_PTR(pid); |
101 | } else { | 102 | else |
102 | struct sched_param param = { .sched_priority = 0 }; | ||
103 | wait_for_completion(&create->started); | 103 | wait_for_completion(&create->started); |
104 | read_lock(&tasklist_lock); | ||
105 | create->result = find_task_by_pid_ns(pid, &init_pid_ns); | ||
106 | read_unlock(&tasklist_lock); | ||
107 | /* | ||
108 | * root may have changed our (kthreadd's) priority or CPU mask. | ||
109 | * The kernel thread should not inherit these properties. | ||
110 | */ | ||
111 | sched_setscheduler(create->result, SCHED_NORMAL, ¶m); | ||
112 | set_user_nice(create->result, KTHREAD_NICE_LEVEL); | ||
113 | set_cpus_allowed_ptr(create->result, cpu_all_mask); | ||
114 | } | ||
115 | complete(&create->done); | 104 | complete(&create->done); |
116 | } | 105 | } |
117 | 106 | ||
@@ -154,11 +143,20 @@ struct task_struct *kthread_create(int (*threadfn)(void *data), | |||
154 | wait_for_completion(&create.done); | 143 | wait_for_completion(&create.done); |
155 | 144 | ||
156 | if (!IS_ERR(create.result)) { | 145 | if (!IS_ERR(create.result)) { |
146 | struct sched_param param = { .sched_priority = 0 }; | ||
157 | va_list args; | 147 | va_list args; |
148 | |||
158 | va_start(args, namefmt); | 149 | va_start(args, namefmt); |
159 | vsnprintf(create.result->comm, sizeof(create.result->comm), | 150 | vsnprintf(create.result->comm, sizeof(create.result->comm), |
160 | namefmt, args); | 151 | namefmt, args); |
161 | va_end(args); | 152 | va_end(args); |
153 | /* | ||
154 | * root may have changed our (kthreadd's) priority or CPU mask. | ||
155 | * The kernel thread should not inherit these properties. | ||
156 | */ | ||
157 | sched_setscheduler_nocheck(create.result, SCHED_NORMAL, ¶m); | ||
158 | set_user_nice(create.result, KTHREAD_NICE_LEVEL); | ||
159 | set_cpus_allowed_ptr(create.result, cpu_all_mask); | ||
162 | } | 160 | } |
163 | return create.result; | 161 | return create.result; |
164 | } | 162 | } |
diff --git a/kernel/module.c b/kernel/module.c index c268a771595c..05f014efa32c 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -1952,9 +1952,6 @@ static noinline struct module *load_module(void __user *umod, | |||
1952 | if (strstarts(secstrings+sechdrs[i].sh_name, ".exit")) | 1952 | if (strstarts(secstrings+sechdrs[i].sh_name, ".exit")) |
1953 | sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; | 1953 | sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; |
1954 | #endif | 1954 | #endif |
1955 | /* Don't keep __versions around; it's just for loading. */ | ||
1956 | if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0) | ||
1957 | sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC; | ||
1958 | } | 1955 | } |
1959 | 1956 | ||
1960 | modindex = find_sec(hdr, sechdrs, secstrings, | 1957 | modindex = find_sec(hdr, sechdrs, secstrings, |
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 8e5d9a68b022..c9dcf98b4463 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c | |||
@@ -18,7 +18,7 @@ void update_rlimit_cpu(unsigned long rlim_new) | |||
18 | 18 | ||
19 | cputime = secs_to_cputime(rlim_new); | 19 | cputime = secs_to_cputime(rlim_new); |
20 | if (cputime_eq(current->signal->it_prof_expires, cputime_zero) || | 20 | if (cputime_eq(current->signal->it_prof_expires, cputime_zero) || |
21 | cputime_lt(current->signal->it_prof_expires, cputime)) { | 21 | cputime_gt(current->signal->it_prof_expires, cputime)) { |
22 | spin_lock_irq(¤t->sighand->siglock); | 22 | spin_lock_irq(¤t->sighand->siglock); |
23 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); | 23 | set_process_cpu_timer(current, CPUCLOCK_PROF, &cputime, NULL); |
24 | spin_unlock_irq(¤t->sighand->siglock); | 24 | spin_unlock_irq(¤t->sighand->siglock); |
@@ -224,7 +224,7 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, | |||
224 | cpu->cpu = virt_ticks(p); | 224 | cpu->cpu = virt_ticks(p); |
225 | break; | 225 | break; |
226 | case CPUCLOCK_SCHED: | 226 | case CPUCLOCK_SCHED: |
227 | cpu->sched = p->se.sum_exec_runtime + task_delta_exec(p); | 227 | cpu->sched = task_sched_runtime(p); |
228 | break; | 228 | break; |
229 | } | 229 | } |
230 | return 0; | 230 | return 0; |
@@ -305,18 +305,19 @@ static int cpu_clock_sample_group(const clockid_t which_clock, | |||
305 | { | 305 | { |
306 | struct task_cputime cputime; | 306 | struct task_cputime cputime; |
307 | 307 | ||
308 | thread_group_cputime(p, &cputime); | ||
309 | switch (CPUCLOCK_WHICH(which_clock)) { | 308 | switch (CPUCLOCK_WHICH(which_clock)) { |
310 | default: | 309 | default: |
311 | return -EINVAL; | 310 | return -EINVAL; |
312 | case CPUCLOCK_PROF: | 311 | case CPUCLOCK_PROF: |
312 | thread_group_cputime(p, &cputime); | ||
313 | cpu->cpu = cputime_add(cputime.utime, cputime.stime); | 313 | cpu->cpu = cputime_add(cputime.utime, cputime.stime); |
314 | break; | 314 | break; |
315 | case CPUCLOCK_VIRT: | 315 | case CPUCLOCK_VIRT: |
316 | thread_group_cputime(p, &cputime); | ||
316 | cpu->cpu = cputime.utime; | 317 | cpu->cpu = cputime.utime; |
317 | break; | 318 | break; |
318 | case CPUCLOCK_SCHED: | 319 | case CPUCLOCK_SCHED: |
319 | cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); | 320 | cpu->sched = thread_group_sched_runtime(p); |
320 | break; | 321 | break; |
321 | } | 322 | } |
322 | return 0; | 323 | return 0; |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index aaad0ec34194..64191fa09b7e 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -21,9 +21,7 @@ | |||
21 | #include <linux/audit.h> | 21 | #include <linux/audit.h> |
22 | #include <linux/pid_namespace.h> | 22 | #include <linux/pid_namespace.h> |
23 | #include <linux/syscalls.h> | 23 | #include <linux/syscalls.h> |
24 | 24 | #include <linux/uaccess.h> | |
25 | #include <asm/pgtable.h> | ||
26 | #include <asm/uaccess.h> | ||
27 | 25 | ||
28 | 26 | ||
29 | /* | 27 | /* |
@@ -48,7 +46,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent) | |||
48 | list_add(&child->ptrace_entry, &new_parent->ptraced); | 46 | list_add(&child->ptrace_entry, &new_parent->ptraced); |
49 | child->parent = new_parent; | 47 | child->parent = new_parent; |
50 | } | 48 | } |
51 | 49 | ||
52 | /* | 50 | /* |
53 | * Turn a tracing stop into a normal stop now, since with no tracer there | 51 | * Turn a tracing stop into a normal stop now, since with no tracer there |
54 | * would be no way to wake it up with SIGCONT or SIGKILL. If there was a | 52 | * would be no way to wake it up with SIGCONT or SIGKILL. If there was a |
@@ -173,7 +171,7 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode) | |||
173 | task_lock(task); | 171 | task_lock(task); |
174 | err = __ptrace_may_access(task, mode); | 172 | err = __ptrace_may_access(task, mode); |
175 | task_unlock(task); | 173 | task_unlock(task); |
176 | return (!err ? true : false); | 174 | return !err; |
177 | } | 175 | } |
178 | 176 | ||
179 | int ptrace_attach(struct task_struct *task) | 177 | int ptrace_attach(struct task_struct *task) |
@@ -358,7 +356,7 @@ int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst | |||
358 | copied += retval; | 356 | copied += retval; |
359 | src += retval; | 357 | src += retval; |
360 | dst += retval; | 358 | dst += retval; |
361 | len -= retval; | 359 | len -= retval; |
362 | } | 360 | } |
363 | return copied; | 361 | return copied; |
364 | } | 362 | } |
@@ -383,7 +381,7 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds | |||
383 | copied += retval; | 381 | copied += retval; |
384 | src += retval; | 382 | src += retval; |
385 | dst += retval; | 383 | dst += retval; |
386 | len -= retval; | 384 | len -= retval; |
387 | } | 385 | } |
388 | return copied; | 386 | return copied; |
389 | } | 387 | } |
@@ -496,9 +494,9 @@ static int ptrace_resume(struct task_struct *child, long request, long data) | |||
496 | if (unlikely(!arch_has_single_step())) | 494 | if (unlikely(!arch_has_single_step())) |
497 | return -EIO; | 495 | return -EIO; |
498 | user_enable_single_step(child); | 496 | user_enable_single_step(child); |
499 | } | 497 | } else { |
500 | else | ||
501 | user_disable_single_step(child); | 498 | user_disable_single_step(child); |
499 | } | ||
502 | 500 | ||
503 | child->exit_code = data; | 501 | child->exit_code = data; |
504 | wake_up_process(child); | 502 | wake_up_process(child); |
diff --git a/kernel/sched.c b/kernel/sched.c index 6cc1fd5d5072..5724508c3b66 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1418,10 +1418,22 @@ iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1418 | struct rq_iterator *iterator); | 1418 | struct rq_iterator *iterator); |
1419 | #endif | 1419 | #endif |
1420 | 1420 | ||
1421 | /* Time spent by the tasks of the cpu accounting group executing in ... */ | ||
1422 | enum cpuacct_stat_index { | ||
1423 | CPUACCT_STAT_USER, /* ... user mode */ | ||
1424 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | ||
1425 | |||
1426 | CPUACCT_STAT_NSTATS, | ||
1427 | }; | ||
1428 | |||
1421 | #ifdef CONFIG_CGROUP_CPUACCT | 1429 | #ifdef CONFIG_CGROUP_CPUACCT |
1422 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime); | 1430 | static void cpuacct_charge(struct task_struct *tsk, u64 cputime); |
1431 | static void cpuacct_update_stats(struct task_struct *tsk, | ||
1432 | enum cpuacct_stat_index idx, cputime_t val); | ||
1423 | #else | 1433 | #else |
1424 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} | 1434 | static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} |
1435 | static inline void cpuacct_update_stats(struct task_struct *tsk, | ||
1436 | enum cpuacct_stat_index idx, cputime_t val) {} | ||
1425 | #endif | 1437 | #endif |
1426 | 1438 | ||
1427 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) | 1439 | static inline void inc_cpu_load(struct rq *rq, unsigned long load) |
@@ -4511,9 +4523,25 @@ DEFINE_PER_CPU(struct kernel_stat, kstat); | |||
4511 | EXPORT_PER_CPU_SYMBOL(kstat); | 4523 | EXPORT_PER_CPU_SYMBOL(kstat); |
4512 | 4524 | ||
4513 | /* | 4525 | /* |
4514 | * Return any ns on the sched_clock that have not yet been banked in | 4526 | * Return any ns on the sched_clock that have not yet been accounted in |
4515 | * @p in case that task is currently running. | 4527 | * @p in case that task is currently running. |
4528 | * | ||
4529 | * Called with task_rq_lock() held on @rq. | ||
4516 | */ | 4530 | */ |
4531 | static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) | ||
4532 | { | ||
4533 | u64 ns = 0; | ||
4534 | |||
4535 | if (task_current(rq, p)) { | ||
4536 | update_rq_clock(rq); | ||
4537 | ns = rq->clock - p->se.exec_start; | ||
4538 | if ((s64)ns < 0) | ||
4539 | ns = 0; | ||
4540 | } | ||
4541 | |||
4542 | return ns; | ||
4543 | } | ||
4544 | |||
4517 | unsigned long long task_delta_exec(struct task_struct *p) | 4545 | unsigned long long task_delta_exec(struct task_struct *p) |
4518 | { | 4546 | { |
4519 | unsigned long flags; | 4547 | unsigned long flags; |
@@ -4521,16 +4549,49 @@ unsigned long long task_delta_exec(struct task_struct *p) | |||
4521 | u64 ns = 0; | 4549 | u64 ns = 0; |
4522 | 4550 | ||
4523 | rq = task_rq_lock(p, &flags); | 4551 | rq = task_rq_lock(p, &flags); |
4552 | ns = do_task_delta_exec(p, rq); | ||
4553 | task_rq_unlock(rq, &flags); | ||
4524 | 4554 | ||
4525 | if (task_current(rq, p)) { | 4555 | return ns; |
4526 | u64 delta_exec; | 4556 | } |
4527 | 4557 | ||
4528 | update_rq_clock(rq); | 4558 | /* |
4529 | delta_exec = rq->clock - p->se.exec_start; | 4559 | * Return accounted runtime for the task. |
4530 | if ((s64)delta_exec > 0) | 4560 | * In case the task is currently running, return the runtime plus current's |
4531 | ns = delta_exec; | 4561 | * pending runtime that have not been accounted yet. |
4532 | } | 4562 | */ |
4563 | unsigned long long task_sched_runtime(struct task_struct *p) | ||
4564 | { | ||
4565 | unsigned long flags; | ||
4566 | struct rq *rq; | ||
4567 | u64 ns = 0; | ||
4568 | |||
4569 | rq = task_rq_lock(p, &flags); | ||
4570 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); | ||
4571 | task_rq_unlock(rq, &flags); | ||
4572 | |||
4573 | return ns; | ||
4574 | } | ||
4575 | |||
4576 | /* | ||
4577 | * Return sum_exec_runtime for the thread group. | ||
4578 | * In case the task is currently running, return the sum plus current's | ||
4579 | * pending runtime that have not been accounted yet. | ||
4580 | * | ||
4581 | * Note that the thread group might have other running tasks as well, | ||
4582 | * so the return value not includes other pending runtime that other | ||
4583 | * running tasks might have. | ||
4584 | */ | ||
4585 | unsigned long long thread_group_sched_runtime(struct task_struct *p) | ||
4586 | { | ||
4587 | struct task_cputime totals; | ||
4588 | unsigned long flags; | ||
4589 | struct rq *rq; | ||
4590 | u64 ns; | ||
4533 | 4591 | ||
4592 | rq = task_rq_lock(p, &flags); | ||
4593 | thread_group_cputime(p, &totals); | ||
4594 | ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); | ||
4534 | task_rq_unlock(rq, &flags); | 4595 | task_rq_unlock(rq, &flags); |
4535 | 4596 | ||
4536 | return ns; | 4597 | return ns; |
@@ -4559,6 +4620,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime, | |||
4559 | cpustat->nice = cputime64_add(cpustat->nice, tmp); | 4620 | cpustat->nice = cputime64_add(cpustat->nice, tmp); |
4560 | else | 4621 | else |
4561 | cpustat->user = cputime64_add(cpustat->user, tmp); | 4622 | cpustat->user = cputime64_add(cpustat->user, tmp); |
4623 | |||
4624 | cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime); | ||
4562 | /* Account for user time used */ | 4625 | /* Account for user time used */ |
4563 | acct_update_integrals(p); | 4626 | acct_update_integrals(p); |
4564 | } | 4627 | } |
@@ -4620,6 +4683,8 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
4620 | else | 4683 | else |
4621 | cpustat->system = cputime64_add(cpustat->system, tmp); | 4684 | cpustat->system = cputime64_add(cpustat->system, tmp); |
4622 | 4685 | ||
4686 | cpuacct_update_stats(p, CPUACCT_STAT_SYSTEM, cputime); | ||
4687 | |||
4623 | /* Account for system time used */ | 4688 | /* Account for system time used */ |
4624 | acct_update_integrals(p); | 4689 | acct_update_integrals(p); |
4625 | } | 4690 | } |
@@ -7302,7 +7367,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
7302 | cpumask_or(groupmask, groupmask, sched_group_cpus(group)); | 7367 | cpumask_or(groupmask, groupmask, sched_group_cpus(group)); |
7303 | 7368 | ||
7304 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); | 7369 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); |
7305 | printk(KERN_CONT " %s", str); | 7370 | printk(KERN_CONT " %s (__cpu_power = %d)", str, |
7371 | group->__cpu_power); | ||
7306 | 7372 | ||
7307 | group = group->next; | 7373 | group = group->next; |
7308 | } while (group != sd->groups); | 7374 | } while (group != sd->groups); |
@@ -9925,6 +9991,7 @@ struct cpuacct { | |||
9925 | struct cgroup_subsys_state css; | 9991 | struct cgroup_subsys_state css; |
9926 | /* cpuusage holds pointer to a u64-type object on every cpu */ | 9992 | /* cpuusage holds pointer to a u64-type object on every cpu */ |
9927 | u64 *cpuusage; | 9993 | u64 *cpuusage; |
9994 | struct percpu_counter cpustat[CPUACCT_STAT_NSTATS]; | ||
9928 | struct cpuacct *parent; | 9995 | struct cpuacct *parent; |
9929 | }; | 9996 | }; |
9930 | 9997 | ||
@@ -9949,20 +10016,32 @@ static struct cgroup_subsys_state *cpuacct_create( | |||
9949 | struct cgroup_subsys *ss, struct cgroup *cgrp) | 10016 | struct cgroup_subsys *ss, struct cgroup *cgrp) |
9950 | { | 10017 | { |
9951 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); | 10018 | struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL); |
10019 | int i; | ||
9952 | 10020 | ||
9953 | if (!ca) | 10021 | if (!ca) |
9954 | return ERR_PTR(-ENOMEM); | 10022 | goto out; |
9955 | 10023 | ||
9956 | ca->cpuusage = alloc_percpu(u64); | 10024 | ca->cpuusage = alloc_percpu(u64); |
9957 | if (!ca->cpuusage) { | 10025 | if (!ca->cpuusage) |
9958 | kfree(ca); | 10026 | goto out_free_ca; |
9959 | return ERR_PTR(-ENOMEM); | 10027 | |
9960 | } | 10028 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) |
10029 | if (percpu_counter_init(&ca->cpustat[i], 0)) | ||
10030 | goto out_free_counters; | ||
9961 | 10031 | ||
9962 | if (cgrp->parent) | 10032 | if (cgrp->parent) |
9963 | ca->parent = cgroup_ca(cgrp->parent); | 10033 | ca->parent = cgroup_ca(cgrp->parent); |
9964 | 10034 | ||
9965 | return &ca->css; | 10035 | return &ca->css; |
10036 | |||
10037 | out_free_counters: | ||
10038 | while (--i >= 0) | ||
10039 | percpu_counter_destroy(&ca->cpustat[i]); | ||
10040 | free_percpu(ca->cpuusage); | ||
10041 | out_free_ca: | ||
10042 | kfree(ca); | ||
10043 | out: | ||
10044 | return ERR_PTR(-ENOMEM); | ||
9966 | } | 10045 | } |
9967 | 10046 | ||
9968 | /* destroy an existing cpu accounting group */ | 10047 | /* destroy an existing cpu accounting group */ |
@@ -9970,7 +10049,10 @@ static void | |||
9970 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | 10049 | cpuacct_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) |
9971 | { | 10050 | { |
9972 | struct cpuacct *ca = cgroup_ca(cgrp); | 10051 | struct cpuacct *ca = cgroup_ca(cgrp); |
10052 | int i; | ||
9973 | 10053 | ||
10054 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) | ||
10055 | percpu_counter_destroy(&ca->cpustat[i]); | ||
9974 | free_percpu(ca->cpuusage); | 10056 | free_percpu(ca->cpuusage); |
9975 | kfree(ca); | 10057 | kfree(ca); |
9976 | } | 10058 | } |
@@ -10057,6 +10139,25 @@ static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | |||
10057 | return 0; | 10139 | return 0; |
10058 | } | 10140 | } |
10059 | 10141 | ||
10142 | static const char *cpuacct_stat_desc[] = { | ||
10143 | [CPUACCT_STAT_USER] = "user", | ||
10144 | [CPUACCT_STAT_SYSTEM] = "system", | ||
10145 | }; | ||
10146 | |||
10147 | static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, | ||
10148 | struct cgroup_map_cb *cb) | ||
10149 | { | ||
10150 | struct cpuacct *ca = cgroup_ca(cgrp); | ||
10151 | int i; | ||
10152 | |||
10153 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) { | ||
10154 | s64 val = percpu_counter_read(&ca->cpustat[i]); | ||
10155 | val = cputime64_to_clock_t(val); | ||
10156 | cb->fill(cb, cpuacct_stat_desc[i], val); | ||
10157 | } | ||
10158 | return 0; | ||
10159 | } | ||
10160 | |||
10060 | static struct cftype files[] = { | 10161 | static struct cftype files[] = { |
10061 | { | 10162 | { |
10062 | .name = "usage", | 10163 | .name = "usage", |
@@ -10067,7 +10168,10 @@ static struct cftype files[] = { | |||
10067 | .name = "usage_percpu", | 10168 | .name = "usage_percpu", |
10068 | .read_seq_string = cpuacct_percpu_seq_read, | 10169 | .read_seq_string = cpuacct_percpu_seq_read, |
10069 | }, | 10170 | }, |
10070 | 10171 | { | |
10172 | .name = "stat", | ||
10173 | .read_map = cpuacct_stats_show, | ||
10174 | }, | ||
10071 | }; | 10175 | }; |
10072 | 10176 | ||
10073 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | 10177 | static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) |
@@ -10089,12 +10193,38 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |||
10089 | return; | 10193 | return; |
10090 | 10194 | ||
10091 | cpu = task_cpu(tsk); | 10195 | cpu = task_cpu(tsk); |
10196 | |||
10197 | rcu_read_lock(); | ||
10198 | |||
10092 | ca = task_ca(tsk); | 10199 | ca = task_ca(tsk); |
10093 | 10200 | ||
10094 | for (; ca; ca = ca->parent) { | 10201 | for (; ca; ca = ca->parent) { |
10095 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | 10202 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
10096 | *cpuusage += cputime; | 10203 | *cpuusage += cputime; |
10097 | } | 10204 | } |
10205 | |||
10206 | rcu_read_unlock(); | ||
10207 | } | ||
10208 | |||
10209 | /* | ||
10210 | * Charge the system/user time to the task's accounting group. | ||
10211 | */ | ||
10212 | static void cpuacct_update_stats(struct task_struct *tsk, | ||
10213 | enum cpuacct_stat_index idx, cputime_t val) | ||
10214 | { | ||
10215 | struct cpuacct *ca; | ||
10216 | |||
10217 | if (unlikely(!cpuacct_subsys.active)) | ||
10218 | return; | ||
10219 | |||
10220 | rcu_read_lock(); | ||
10221 | ca = task_ca(tsk); | ||
10222 | |||
10223 | do { | ||
10224 | percpu_counter_add(&ca->cpustat[idx], val); | ||
10225 | ca = ca->parent; | ||
10226 | } while (ca); | ||
10227 | rcu_read_unlock(); | ||
10098 | } | 10228 | } |
10099 | 10229 | ||
10100 | struct cgroup_subsys cpuacct_subsys = { | 10230 | struct cgroup_subsys cpuacct_subsys = { |
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index 1e00bfacf9b8..cdd3c89574cd 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c | |||
@@ -55,7 +55,7 @@ static int convert_prio(int prio) | |||
55 | * cpupri_find - find the best (lowest-pri) CPU in the system | 55 | * cpupri_find - find the best (lowest-pri) CPU in the system |
56 | * @cp: The cpupri context | 56 | * @cp: The cpupri context |
57 | * @p: The task | 57 | * @p: The task |
58 | * @lowest_mask: A mask to fill in with selected CPUs | 58 | * @lowest_mask: A mask to fill in with selected CPUs (or NULL) |
59 | * | 59 | * |
60 | * Note: This function returns the recommended CPUs as calculated during the | 60 | * Note: This function returns the recommended CPUs as calculated during the |
61 | * current invokation. By the time the call returns, the CPUs may have in | 61 | * current invokation. By the time the call returns, the CPUs may have in |
@@ -81,7 +81,8 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, | |||
81 | if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) | 81 | if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) |
82 | continue; | 82 | continue; |
83 | 83 | ||
84 | cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); | 84 | if (lowest_mask) |
85 | cpumask_and(lowest_mask, &p->cpus_allowed, vec->mask); | ||
85 | return 1; | 86 | return 1; |
86 | } | 87 | } |
87 | 88 | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 299d012b4394..f2c66f8f9712 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -948,20 +948,15 @@ static int select_task_rq_rt(struct task_struct *p, int sync) | |||
948 | 948 | ||
949 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | 949 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) |
950 | { | 950 | { |
951 | cpumask_var_t mask; | ||
952 | |||
953 | if (rq->curr->rt.nr_cpus_allowed == 1) | 951 | if (rq->curr->rt.nr_cpus_allowed == 1) |
954 | return; | 952 | return; |
955 | 953 | ||
956 | if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
957 | return; | ||
958 | |||
959 | if (p->rt.nr_cpus_allowed != 1 | 954 | if (p->rt.nr_cpus_allowed != 1 |
960 | && cpupri_find(&rq->rd->cpupri, p, mask)) | 955 | && cpupri_find(&rq->rd->cpupri, p, NULL)) |
961 | goto free; | 956 | return; |
962 | 957 | ||
963 | if (!cpupri_find(&rq->rd->cpupri, rq->curr, mask)) | 958 | if (!cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) |
964 | goto free; | 959 | return; |
965 | 960 | ||
966 | /* | 961 | /* |
967 | * There appears to be other cpus that can accept | 962 | * There appears to be other cpus that can accept |
@@ -970,8 +965,6 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | |||
970 | */ | 965 | */ |
971 | requeue_task_rt(rq, p, 1); | 966 | requeue_task_rt(rq, p, 1); |
972 | resched_task(rq->curr); | 967 | resched_task(rq->curr); |
973 | free: | ||
974 | free_cpumask_var(mask); | ||
975 | } | 968 | } |
976 | 969 | ||
977 | #endif /* CONFIG_SMP */ | 970 | #endif /* CONFIG_SMP */ |
diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 85d5a2455103..88796c330838 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c | |||
@@ -166,97 +166,11 @@ void softlockup_tick(void) | |||
166 | } | 166 | } |
167 | 167 | ||
168 | /* | 168 | /* |
169 | * Have a reasonable limit on the number of tasks checked: | ||
170 | */ | ||
171 | unsigned long __read_mostly sysctl_hung_task_check_count = 1024; | ||
172 | |||
173 | /* | ||
174 | * Zero means infinite timeout - no checking done: | ||
175 | */ | ||
176 | unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480; | ||
177 | |||
178 | unsigned long __read_mostly sysctl_hung_task_warnings = 10; | ||
179 | |||
180 | /* | ||
181 | * Only do the hung-tasks check on one CPU: | ||
182 | */ | ||
183 | static int check_cpu __read_mostly = -1; | ||
184 | |||
185 | static void check_hung_task(struct task_struct *t, unsigned long now) | ||
186 | { | ||
187 | unsigned long switch_count = t->nvcsw + t->nivcsw; | ||
188 | |||
189 | if (t->flags & PF_FROZEN) | ||
190 | return; | ||
191 | |||
192 | if (switch_count != t->last_switch_count || !t->last_switch_timestamp) { | ||
193 | t->last_switch_count = switch_count; | ||
194 | t->last_switch_timestamp = now; | ||
195 | return; | ||
196 | } | ||
197 | if ((long)(now - t->last_switch_timestamp) < | ||
198 | sysctl_hung_task_timeout_secs) | ||
199 | return; | ||
200 | if (!sysctl_hung_task_warnings) | ||
201 | return; | ||
202 | sysctl_hung_task_warnings--; | ||
203 | |||
204 | /* | ||
205 | * Ok, the task did not get scheduled for more than 2 minutes, | ||
206 | * complain: | ||
207 | */ | ||
208 | printk(KERN_ERR "INFO: task %s:%d blocked for more than " | ||
209 | "%ld seconds.\n", t->comm, t->pid, | ||
210 | sysctl_hung_task_timeout_secs); | ||
211 | printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" | ||
212 | " disables this message.\n"); | ||
213 | sched_show_task(t); | ||
214 | __debug_show_held_locks(t); | ||
215 | |||
216 | t->last_switch_timestamp = now; | ||
217 | touch_nmi_watchdog(); | ||
218 | |||
219 | if (softlockup_panic) | ||
220 | panic("softlockup: blocked tasks"); | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for | ||
225 | * a really long time (120 seconds). If that happens, print out | ||
226 | * a warning. | ||
227 | */ | ||
228 | static void check_hung_uninterruptible_tasks(int this_cpu) | ||
229 | { | ||
230 | int max_count = sysctl_hung_task_check_count; | ||
231 | unsigned long now = get_timestamp(this_cpu); | ||
232 | struct task_struct *g, *t; | ||
233 | |||
234 | /* | ||
235 | * If the system crashed already then all bets are off, | ||
236 | * do not report extra hung tasks: | ||
237 | */ | ||
238 | if (test_taint(TAINT_DIE) || did_panic) | ||
239 | return; | ||
240 | |||
241 | read_lock(&tasklist_lock); | ||
242 | do_each_thread(g, t) { | ||
243 | if (!--max_count) | ||
244 | goto unlock; | ||
245 | /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ | ||
246 | if (t->state == TASK_UNINTERRUPTIBLE) | ||
247 | check_hung_task(t, now); | ||
248 | } while_each_thread(g, t); | ||
249 | unlock: | ||
250 | read_unlock(&tasklist_lock); | ||
251 | } | ||
252 | |||
253 | /* | ||
254 | * The watchdog thread - runs every second and touches the timestamp. | 169 | * The watchdog thread - runs every second and touches the timestamp. |
255 | */ | 170 | */ |
256 | static int watchdog(void *__bind_cpu) | 171 | static int watchdog(void *__bind_cpu) |
257 | { | 172 | { |
258 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 173 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
259 | int this_cpu = (long)__bind_cpu; | ||
260 | 174 | ||
261 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 175 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
262 | 176 | ||
@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu) | |||
276 | if (kthread_should_stop()) | 190 | if (kthread_should_stop()) |
277 | break; | 191 | break; |
278 | 192 | ||
279 | if (this_cpu == check_cpu) { | ||
280 | if (sysctl_hung_task_timeout_secs) | ||
281 | check_hung_uninterruptible_tasks(this_cpu); | ||
282 | } | ||
283 | |||
284 | set_current_state(TASK_INTERRUPTIBLE); | 193 | set_current_state(TASK_INTERRUPTIBLE); |
285 | } | 194 | } |
286 | __set_current_state(TASK_RUNNING); | 195 | __set_current_state(TASK_RUNNING); |
@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
312 | break; | 221 | break; |
313 | case CPU_ONLINE: | 222 | case CPU_ONLINE: |
314 | case CPU_ONLINE_FROZEN: | 223 | case CPU_ONLINE_FROZEN: |
315 | check_cpu = cpumask_any(cpu_online_mask); | ||
316 | wake_up_process(per_cpu(watchdog_task, hotcpu)); | 224 | wake_up_process(per_cpu(watchdog_task, hotcpu)); |
317 | break; | 225 | break; |
318 | #ifdef CONFIG_HOTPLUG_CPU | 226 | #ifdef CONFIG_HOTPLUG_CPU |
319 | case CPU_DOWN_PREPARE: | ||
320 | case CPU_DOWN_PREPARE_FROZEN: | ||
321 | if (hotcpu == check_cpu) { | ||
322 | /* Pick any other online cpu. */ | ||
323 | check_cpu = cpumask_any_but(cpu_online_mask, hotcpu); | ||
324 | } | ||
325 | break; | ||
326 | |||
327 | case CPU_UP_CANCELED: | 227 | case CPU_UP_CANCELED: |
328 | case CPU_UP_CANCELED_FROZEN: | 228 | case CPU_UP_CANCELED_FROZEN: |
329 | if (!per_cpu(watchdog_task, hotcpu)) | 229 | if (!per_cpu(watchdog_task, hotcpu)) |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b125e3387568..4286b62b34a0 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -101,6 +101,7 @@ static int __maybe_unused one = 1; | |||
101 | static int __maybe_unused two = 2; | 101 | static int __maybe_unused two = 2; |
102 | static unsigned long one_ul = 1; | 102 | static unsigned long one_ul = 1; |
103 | static int one_hundred = 100; | 103 | static int one_hundred = 100; |
104 | static int one_thousand = 1000; | ||
104 | 105 | ||
105 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ | 106 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ |
106 | static int maxolduid = 65535; | 107 | static int maxolduid = 65535; |
@@ -813,6 +814,19 @@ static struct ctl_table kern_table[] = { | |||
813 | .extra1 = &neg_one, | 814 | .extra1 = &neg_one, |
814 | .extra2 = &sixty, | 815 | .extra2 = &sixty, |
815 | }, | 816 | }, |
817 | #endif | ||
818 | #ifdef CONFIG_DETECT_HUNG_TASK | ||
819 | { | ||
820 | .ctl_name = CTL_UNNUMBERED, | ||
821 | .procname = "hung_task_panic", | ||
822 | .data = &sysctl_hung_task_panic, | ||
823 | .maxlen = sizeof(int), | ||
824 | .mode = 0644, | ||
825 | .proc_handler = &proc_dointvec_minmax, | ||
826 | .strategy = &sysctl_intvec, | ||
827 | .extra1 = &zero, | ||
828 | .extra2 = &one, | ||
829 | }, | ||
816 | { | 830 | { |
817 | .ctl_name = CTL_UNNUMBERED, | 831 | .ctl_name = CTL_UNNUMBERED, |
818 | .procname = "hung_task_check_count", | 832 | .procname = "hung_task_check_count", |
@@ -828,7 +842,7 @@ static struct ctl_table kern_table[] = { | |||
828 | .data = &sysctl_hung_task_timeout_secs, | 842 | .data = &sysctl_hung_task_timeout_secs, |
829 | .maxlen = sizeof(unsigned long), | 843 | .maxlen = sizeof(unsigned long), |
830 | .mode = 0644, | 844 | .mode = 0644, |
831 | .proc_handler = &proc_doulongvec_minmax, | 845 | .proc_handler = &proc_dohung_task_timeout_secs, |
832 | .strategy = &sysctl_intvec, | 846 | .strategy = &sysctl_intvec, |
833 | }, | 847 | }, |
834 | { | 848 | { |
@@ -1027,6 +1041,28 @@ static struct ctl_table vm_table[] = { | |||
1027 | .proc_handler = &proc_dointvec, | 1041 | .proc_handler = &proc_dointvec, |
1028 | }, | 1042 | }, |
1029 | { | 1043 | { |
1044 | .ctl_name = CTL_UNNUMBERED, | ||
1045 | .procname = "nr_pdflush_threads_min", | ||
1046 | .data = &nr_pdflush_threads_min, | ||
1047 | .maxlen = sizeof nr_pdflush_threads_min, | ||
1048 | .mode = 0644 /* read-write */, | ||
1049 | .proc_handler = &proc_dointvec_minmax, | ||
1050 | .strategy = &sysctl_intvec, | ||
1051 | .extra1 = &one, | ||
1052 | .extra2 = &nr_pdflush_threads_max, | ||
1053 | }, | ||
1054 | { | ||
1055 | .ctl_name = CTL_UNNUMBERED, | ||
1056 | .procname = "nr_pdflush_threads_max", | ||
1057 | .data = &nr_pdflush_threads_max, | ||
1058 | .maxlen = sizeof nr_pdflush_threads_max, | ||
1059 | .mode = 0644 /* read-write */, | ||
1060 | .proc_handler = &proc_dointvec_minmax, | ||
1061 | .strategy = &sysctl_intvec, | ||
1062 | .extra1 = &nr_pdflush_threads_min, | ||
1063 | .extra2 = &one_thousand, | ||
1064 | }, | ||
1065 | { | ||
1030 | .ctl_name = VM_SWAPPINESS, | 1066 | .ctl_name = VM_SWAPPINESS, |
1031 | .procname = "swappiness", | 1067 | .procname = "swappiness", |
1032 | .data = &vm_swappiness, | 1068 | .data = &vm_swappiness, |
diff --git a/kernel/timer.c b/kernel/timer.c index b4555568b4e4..cffffad01c31 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -531,10 +531,13 @@ static void __init_timer(struct timer_list *timer, | |||
531 | } | 531 | } |
532 | 532 | ||
533 | /** | 533 | /** |
534 | * init_timer - initialize a timer. | 534 | * init_timer_key - initialize a timer |
535 | * @timer: the timer to be initialized | 535 | * @timer: the timer to be initialized |
536 | * @name: name of the timer | ||
537 | * @key: lockdep class key of the fake lock used for tracking timer | ||
538 | * sync lock dependencies | ||
536 | * | 539 | * |
537 | * init_timer() must be done to a timer prior calling *any* of the | 540 | * init_timer_key() must be done to a timer prior calling *any* of the |
538 | * other timer functions. | 541 | * other timer functions. |
539 | */ | 542 | */ |
540 | void init_timer_key(struct timer_list *timer, | 543 | void init_timer_key(struct timer_list *timer, |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index b32ff446c3fb..921ef5d1f0ba 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -1377,12 +1377,12 @@ static int blk_trace_str2mask(const char *str) | |||
1377 | { | 1377 | { |
1378 | int i; | 1378 | int i; |
1379 | int mask = 0; | 1379 | int mask = 0; |
1380 | char *s, *token; | 1380 | char *buf, *s, *token; |
1381 | 1381 | ||
1382 | s = kstrdup(str, GFP_KERNEL); | 1382 | buf = kstrdup(str, GFP_KERNEL); |
1383 | if (s == NULL) | 1383 | if (buf == NULL) |
1384 | return -ENOMEM; | 1384 | return -ENOMEM; |
1385 | s = strstrip(s); | 1385 | s = strstrip(buf); |
1386 | 1386 | ||
1387 | while (1) { | 1387 | while (1) { |
1388 | token = strsep(&s, ","); | 1388 | token = strsep(&s, ","); |
@@ -1403,7 +1403,7 @@ static int blk_trace_str2mask(const char *str) | |||
1403 | break; | 1403 | break; |
1404 | } | 1404 | } |
1405 | } | 1405 | } |
1406 | kfree(s); | 1406 | kfree(buf); |
1407 | 1407 | ||
1408 | return mask; | 1408 | return mask; |
1409 | } | 1409 | } |
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0615751a3ed7..4865459f609f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
@@ -3268,19 +3268,13 @@ static int tracing_buffers_open(struct inode *inode, struct file *filp) | |||
3268 | 3268 | ||
3269 | info->tr = &global_trace; | 3269 | info->tr = &global_trace; |
3270 | info->cpu = cpu; | 3270 | info->cpu = cpu; |
3271 | info->spare = ring_buffer_alloc_read_page(info->tr->buffer); | 3271 | info->spare = NULL; |
3272 | /* Force reading ring buffer for first read */ | 3272 | /* Force reading ring buffer for first read */ |
3273 | info->read = (unsigned int)-1; | 3273 | info->read = (unsigned int)-1; |
3274 | if (!info->spare) | ||
3275 | goto out; | ||
3276 | 3274 | ||
3277 | filp->private_data = info; | 3275 | filp->private_data = info; |
3278 | 3276 | ||
3279 | return 0; | 3277 | return nonseekable_open(inode, filp); |
3280 | |||
3281 | out: | ||
3282 | kfree(info); | ||
3283 | return -ENOMEM; | ||
3284 | } | 3278 | } |
3285 | 3279 | ||
3286 | static ssize_t | 3280 | static ssize_t |
@@ -3295,6 +3289,11 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, | |||
3295 | if (!count) | 3289 | if (!count) |
3296 | return 0; | 3290 | return 0; |
3297 | 3291 | ||
3292 | if (!info->spare) | ||
3293 | info->spare = ring_buffer_alloc_read_page(info->tr->buffer); | ||
3294 | if (!info->spare) | ||
3295 | return -ENOMEM; | ||
3296 | |||
3298 | /* Do we have previous read data to read? */ | 3297 | /* Do we have previous read data to read? */ |
3299 | if (info->read < PAGE_SIZE) | 3298 | if (info->read < PAGE_SIZE) |
3300 | goto read; | 3299 | goto read; |
@@ -3333,7 +3332,8 @@ static int tracing_buffers_release(struct inode *inode, struct file *file) | |||
3333 | { | 3332 | { |
3334 | struct ftrace_buffer_info *info = file->private_data; | 3333 | struct ftrace_buffer_info *info = file->private_data; |
3335 | 3334 | ||
3336 | ring_buffer_free_read_page(info->tr->buffer, info->spare); | 3335 | if (info->spare) |
3336 | ring_buffer_free_read_page(info->tr->buffer, info->spare); | ||
3337 | kfree(info); | 3337 | kfree(info); |
3338 | 3338 | ||
3339 | return 0; | 3339 | return 0; |
@@ -3419,14 +3419,19 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
3419 | int size, i; | 3419 | int size, i; |
3420 | size_t ret; | 3420 | size_t ret; |
3421 | 3421 | ||
3422 | /* | 3422 | if (*ppos & (PAGE_SIZE - 1)) { |
3423 | * We can't seek on a buffer input | 3423 | WARN_ONCE(1, "Ftrace: previous read must page-align\n"); |
3424 | */ | 3424 | return -EINVAL; |
3425 | if (unlikely(*ppos)) | 3425 | } |
3426 | return -ESPIPE; | ||
3427 | 3426 | ||
3427 | if (len & (PAGE_SIZE - 1)) { | ||
3428 | WARN_ONCE(1, "Ftrace: splice_read should page-align\n"); | ||
3429 | if (len < PAGE_SIZE) | ||
3430 | return -EINVAL; | ||
3431 | len &= PAGE_MASK; | ||
3432 | } | ||
3428 | 3433 | ||
3429 | for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) { | 3434 | for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) { |
3430 | struct page *page; | 3435 | struct page *page; |
3431 | int r; | 3436 | int r; |
3432 | 3437 | ||
@@ -3465,6 +3470,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, | |||
3465 | spd.partial[i].offset = 0; | 3470 | spd.partial[i].offset = 0; |
3466 | spd.partial[i].private = (unsigned long)ref; | 3471 | spd.partial[i].private = (unsigned long)ref; |
3467 | spd.nr_pages++; | 3472 | spd.nr_pages++; |
3473 | *ppos += PAGE_SIZE; | ||
3468 | } | 3474 | } |
3469 | 3475 | ||
3470 | spd.nr_pages = i; | 3476 | spd.nr_pages = i; |
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index a2a3af29c943..5e579645ac86 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c | |||
@@ -1,5 +1,5 @@ | |||
1 | #include <trace/syscall.h> | ||
1 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
2 | #include <linux/ftrace.h> | ||
3 | #include <asm/syscall.h> | 3 | #include <asm/syscall.h> |
4 | 4 | ||
5 | #include "trace_output.h" | 5 | #include "trace_output.h" |
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b6b966ce1451..f71fb2a08950 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -966,20 +966,20 @@ undo: | |||
966 | } | 966 | } |
967 | 967 | ||
968 | #ifdef CONFIG_SMP | 968 | #ifdef CONFIG_SMP |
969 | static struct workqueue_struct *work_on_cpu_wq __read_mostly; | ||
970 | 969 | ||
971 | struct work_for_cpu { | 970 | struct work_for_cpu { |
972 | struct work_struct work; | 971 | struct completion completion; |
973 | long (*fn)(void *); | 972 | long (*fn)(void *); |
974 | void *arg; | 973 | void *arg; |
975 | long ret; | 974 | long ret; |
976 | }; | 975 | }; |
977 | 976 | ||
978 | static void do_work_for_cpu(struct work_struct *w) | 977 | static int do_work_for_cpu(void *_wfc) |
979 | { | 978 | { |
980 | struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work); | 979 | struct work_for_cpu *wfc = _wfc; |
981 | |||
982 | wfc->ret = wfc->fn(wfc->arg); | 980 | wfc->ret = wfc->fn(wfc->arg); |
981 | complete(&wfc->completion); | ||
982 | return 0; | ||
983 | } | 983 | } |
984 | 984 | ||
985 | /** | 985 | /** |
@@ -990,17 +990,23 @@ static void do_work_for_cpu(struct work_struct *w) | |||
990 | * | 990 | * |
991 | * This will return the value @fn returns. | 991 | * This will return the value @fn returns. |
992 | * It is up to the caller to ensure that the cpu doesn't go offline. | 992 | * It is up to the caller to ensure that the cpu doesn't go offline. |
993 | * The caller must not hold any locks which would prevent @fn from completing. | ||
993 | */ | 994 | */ |
994 | long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) | 995 | long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) |
995 | { | 996 | { |
996 | struct work_for_cpu wfc; | 997 | struct task_struct *sub_thread; |
997 | 998 | struct work_for_cpu wfc = { | |
998 | INIT_WORK(&wfc.work, do_work_for_cpu); | 999 | .completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion), |
999 | wfc.fn = fn; | 1000 | .fn = fn, |
1000 | wfc.arg = arg; | 1001 | .arg = arg, |
1001 | queue_work_on(cpu, work_on_cpu_wq, &wfc.work); | 1002 | }; |
1002 | flush_work(&wfc.work); | 1003 | |
1003 | 1004 | sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu"); | |
1005 | if (IS_ERR(sub_thread)) | ||
1006 | return PTR_ERR(sub_thread); | ||
1007 | kthread_bind(sub_thread, cpu); | ||
1008 | wake_up_process(sub_thread); | ||
1009 | wait_for_completion(&wfc.completion); | ||
1004 | return wfc.ret; | 1010 | return wfc.ret; |
1005 | } | 1011 | } |
1006 | EXPORT_SYMBOL_GPL(work_on_cpu); | 1012 | EXPORT_SYMBOL_GPL(work_on_cpu); |
@@ -1016,8 +1022,4 @@ void __init init_workqueues(void) | |||
1016 | hotcpu_notifier(workqueue_cpu_callback, 0); | 1022 | hotcpu_notifier(workqueue_cpu_callback, 0); |
1017 | keventd_wq = create_workqueue("events"); | 1023 | keventd_wq = create_workqueue("events"); |
1018 | BUG_ON(!keventd_wq); | 1024 | BUG_ON(!keventd_wq); |
1019 | #ifdef CONFIG_SMP | ||
1020 | work_on_cpu_wq = create_workqueue("work_on_cpu"); | ||
1021 | BUG_ON(!work_on_cpu_wq); | ||
1022 | #endif | ||
1023 | } | 1025 | } |