aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/extable.c25
-rw-r--r--kernel/lockdep.c16
-rw-r--r--kernel/panic.c115
-rw-r--r--kernel/power/disk.c4
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/rcupdate.c44
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/slow-work.c640
-rw-r--r--kernel/smp.c432
-rw-r--r--kernel/softirq.c6
-rw-r--r--kernel/sysctl.c9
-rw-r--r--kernel/trace/Kconfig9
-rw-r--r--kernel/trace/ftrace.c2
14 files changed, 1028 insertions, 279 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index e4791b3ba55d..bab1dffe37e9 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -93,6 +93,7 @@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
93obj-$(CONFIG_FUNCTION_TRACER) += trace/ 93obj-$(CONFIG_FUNCTION_TRACER) += trace/
94obj-$(CONFIG_TRACING) += trace/ 94obj-$(CONFIG_TRACING) += trace/
95obj-$(CONFIG_SMP) += sched_cpupri.o 95obj-$(CONFIG_SMP) += sched_cpupri.o
96obj-$(CONFIG_SLOW_WORK) += slow-work.o
96 97
97ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) 98ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
98# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is 99# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/extable.c b/kernel/extable.c
index e136ed8d82ba..c46da6a47036 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -41,6 +41,14 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
41 return e; 41 return e;
42} 42}
43 43
44static inline int init_kernel_text(unsigned long addr)
45{
46 if (addr >= (unsigned long)_sinittext &&
47 addr <= (unsigned long)_einittext)
48 return 1;
49 return 0;
50}
51
44__notrace_funcgraph int core_kernel_text(unsigned long addr) 52__notrace_funcgraph int core_kernel_text(unsigned long addr)
45{ 53{
46 if (addr >= (unsigned long)_stext && 54 if (addr >= (unsigned long)_stext &&
@@ -48,8 +56,7 @@ __notrace_funcgraph int core_kernel_text(unsigned long addr)
48 return 1; 56 return 1;
49 57
50 if (system_state == SYSTEM_BOOTING && 58 if (system_state == SYSTEM_BOOTING &&
51 addr >= (unsigned long)_sinittext && 59 init_kernel_text(addr))
52 addr <= (unsigned long)_einittext)
53 return 1; 60 return 1;
54 return 0; 61 return 0;
55} 62}
@@ -58,7 +65,19 @@ __notrace_funcgraph int __kernel_text_address(unsigned long addr)
58{ 65{
59 if (core_kernel_text(addr)) 66 if (core_kernel_text(addr))
60 return 1; 67 return 1;
61 return __module_text_address(addr) != NULL; 68 if (__module_text_address(addr))
69 return 1;
70 /*
71 * There might be init symbols in saved stacktraces.
72 * Give those symbols a chance to be printed in
73 * backtraces (such as lockdep traces).
74 *
75 * Since we are after the module-symbols check, there's
76 * no danger of address overlap:
77 */
78 if (init_kernel_text(addr))
79 return 1;
80 return 0;
62} 81}
63 82
64int kernel_text_address(unsigned long addr) 83int kernel_text_address(unsigned long addr)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 3673a3f44d9d..981cd4854281 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -433,13 +433,6 @@ atomic_t nr_find_usage_forwards_checks;
433atomic_t nr_find_usage_forwards_recursions; 433atomic_t nr_find_usage_forwards_recursions;
434atomic_t nr_find_usage_backwards_checks; 434atomic_t nr_find_usage_backwards_checks;
435atomic_t nr_find_usage_backwards_recursions; 435atomic_t nr_find_usage_backwards_recursions;
436# define debug_atomic_inc(ptr) atomic_inc(ptr)
437# define debug_atomic_dec(ptr) atomic_dec(ptr)
438# define debug_atomic_read(ptr) atomic_read(ptr)
439#else
440# define debug_atomic_inc(ptr) do { } while (0)
441# define debug_atomic_dec(ptr) do { } while (0)
442# define debug_atomic_read(ptr) 0
443#endif 436#endif
444 437
445/* 438/*
@@ -1900,9 +1893,9 @@ print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other,
1900 curr->comm, task_pid_nr(curr)); 1893 curr->comm, task_pid_nr(curr));
1901 print_lock(this); 1894 print_lock(this);
1902 if (forwards) 1895 if (forwards)
1903 printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass); 1896 printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
1904 else 1897 else
1905 printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass); 1898 printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
1906 print_lock_name(other); 1899 print_lock_name(other);
1907 printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); 1900 printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
1908 1901
@@ -2015,7 +2008,8 @@ typedef int (*check_usage_f)(struct task_struct *, struct held_lock *,
2015 enum lock_usage_bit bit, const char *name); 2008 enum lock_usage_bit bit, const char *name);
2016 2009
2017static int 2010static int
2018mark_lock_irq(struct task_struct *curr, struct held_lock *this, int new_bit) 2011mark_lock_irq(struct task_struct *curr, struct held_lock *this,
2012 enum lock_usage_bit new_bit)
2019{ 2013{
2020 int excl_bit = exclusive_bit(new_bit); 2014 int excl_bit = exclusive_bit(new_bit);
2021 int read = new_bit & 1; 2015 int read = new_bit & 1;
@@ -2043,7 +2037,7 @@ mark_lock_irq(struct task_struct *curr, struct held_lock *this, int new_bit)
2043 * states. 2037 * states.
2044 */ 2038 */
2045 if ((!read || !dir || STRICT_READ_CHECKS) && 2039 if ((!read || !dir || STRICT_READ_CHECKS) &&
2046 !usage(curr, this, excl_bit, state_name(new_bit))) 2040 !usage(curr, this, excl_bit, state_name(new_bit & ~1)))
2047 return 0; 2041 return 0;
2048 2042
2049 /* 2043 /*
diff --git a/kernel/panic.c b/kernel/panic.c
index 32fe4eff1b89..3fd8c5bf8b39 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -8,19 +8,19 @@
8 * This function is used through-out the kernel (including mm and fs) 8 * This function is used through-out the kernel (including mm and fs)
9 * to indicate a major problem. 9 * to indicate a major problem.
10 */ 10 */
11#include <linux/debug_locks.h>
12#include <linux/interrupt.h>
13#include <linux/kallsyms.h>
14#include <linux/notifier.h>
11#include <linux/module.h> 15#include <linux/module.h>
12#include <linux/sched.h> 16#include <linux/random.h>
13#include <linux/delay.h>
14#include <linux/reboot.h> 17#include <linux/reboot.h>
15#include <linux/notifier.h> 18#include <linux/delay.h>
16#include <linux/init.h> 19#include <linux/kexec.h>
20#include <linux/sched.h>
17#include <linux/sysrq.h> 21#include <linux/sysrq.h>
18#include <linux/interrupt.h> 22#include <linux/init.h>
19#include <linux/nmi.h> 23#include <linux/nmi.h>
20#include <linux/kexec.h>
21#include <linux/debug_locks.h>
22#include <linux/random.h>
23#include <linux/kallsyms.h>
24#include <linux/dmi.h> 24#include <linux/dmi.h>
25 25
26int panic_on_oops; 26int panic_on_oops;
@@ -52,19 +52,15 @@ EXPORT_SYMBOL(panic_blink);
52 * 52 *
53 * This function never returns. 53 * This function never returns.
54 */ 54 */
55
56NORET_TYPE void panic(const char * fmt, ...) 55NORET_TYPE void panic(const char * fmt, ...)
57{ 56{
58 long i;
59 static char buf[1024]; 57 static char buf[1024];
60 va_list args; 58 va_list args;
61#if defined(CONFIG_S390) 59 long i;
62 unsigned long caller = (unsigned long) __builtin_return_address(0);
63#endif
64 60
65 /* 61 /*
66 * It's possible to come here directly from a panic-assertion and not 62 * It's possible to come here directly from a panic-assertion and
67 * have preempt disabled. Some functions called from here want 63 * not have preempt disabled. Some functions called from here want
68 * preempt to be disabled. No point enabling it later though... 64 * preempt to be disabled. No point enabling it later though...
69 */ 65 */
70 preempt_disable(); 66 preempt_disable();
@@ -77,7 +73,6 @@ NORET_TYPE void panic(const char * fmt, ...)
77#ifdef CONFIG_DEBUG_BUGVERBOSE 73#ifdef CONFIG_DEBUG_BUGVERBOSE
78 dump_stack(); 74 dump_stack();
79#endif 75#endif
80 bust_spinlocks(0);
81 76
82 /* 77 /*
83 * If we have crashed and we have a crash kernel loaded let it handle 78 * If we have crashed and we have a crash kernel loaded let it handle
@@ -86,14 +81,12 @@ NORET_TYPE void panic(const char * fmt, ...)
86 */ 81 */
87 crash_kexec(NULL); 82 crash_kexec(NULL);
88 83
89#ifdef CONFIG_SMP
90 /* 84 /*
91 * Note smp_send_stop is the usual smp shutdown function, which 85 * Note smp_send_stop is the usual smp shutdown function, which
92 * unfortunately means it may not be hardened to work in a panic 86 * unfortunately means it may not be hardened to work in a panic
93 * situation. 87 * situation.
94 */ 88 */
95 smp_send_stop(); 89 smp_send_stop();
96#endif
97 90
98 atomic_notifier_call_chain(&panic_notifier_list, 0, buf); 91 atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
99 92
@@ -102,19 +95,21 @@ NORET_TYPE void panic(const char * fmt, ...)
102 95
103 if (panic_timeout > 0) { 96 if (panic_timeout > 0) {
104 /* 97 /*
105 * Delay timeout seconds before rebooting the machine. 98 * Delay timeout seconds before rebooting the machine.
106 * We can't use the "normal" timers since we just panicked.. 99 * We can't use the "normal" timers since we just panicked.
107 */ 100 */
108 printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout); 101 printk(KERN_EMERG "Rebooting in %d seconds..", panic_timeout);
102
109 for (i = 0; i < panic_timeout*1000; ) { 103 for (i = 0; i < panic_timeout*1000; ) {
110 touch_nmi_watchdog(); 104 touch_nmi_watchdog();
111 i += panic_blink(i); 105 i += panic_blink(i);
112 mdelay(1); 106 mdelay(1);
113 i++; 107 i++;
114 } 108 }
115 /* This will not be a clean reboot, with everything 109 /*
116 * shutting down. But if there is a chance of 110 * This will not be a clean reboot, with everything
117 * rebooting the system it will be rebooted. 111 * shutting down. But if there is a chance of
112 * rebooting the system it will be rebooted.
118 */ 113 */
119 emergency_restart(); 114 emergency_restart();
120 } 115 }
@@ -127,38 +122,44 @@ NORET_TYPE void panic(const char * fmt, ...)
127 } 122 }
128#endif 123#endif
129#if defined(CONFIG_S390) 124#if defined(CONFIG_S390)
130 disabled_wait(caller); 125 {
126 unsigned long caller;
127
128 caller = (unsigned long)__builtin_return_address(0);
129 disabled_wait(caller);
130 }
131#endif 131#endif
132 local_irq_enable(); 132 local_irq_enable();
133 for (i = 0;;) { 133 for (i = 0; ; ) {
134 touch_softlockup_watchdog(); 134 touch_softlockup_watchdog();
135 i += panic_blink(i); 135 i += panic_blink(i);
136 mdelay(1); 136 mdelay(1);
137 i++; 137 i++;
138 } 138 }
139 bust_spinlocks(0);
139} 140}
140 141
141EXPORT_SYMBOL(panic); 142EXPORT_SYMBOL(panic);
142 143
143 144
144struct tnt { 145struct tnt {
145 u8 bit; 146 u8 bit;
146 char true; 147 char true;
147 char false; 148 char false;
148}; 149};
149 150
150static const struct tnt tnts[] = { 151static const struct tnt tnts[] = {
151 { TAINT_PROPRIETARY_MODULE, 'P', 'G' }, 152 { TAINT_PROPRIETARY_MODULE, 'P', 'G' },
152 { TAINT_FORCED_MODULE, 'F', ' ' }, 153 { TAINT_FORCED_MODULE, 'F', ' ' },
153 { TAINT_UNSAFE_SMP, 'S', ' ' }, 154 { TAINT_UNSAFE_SMP, 'S', ' ' },
154 { TAINT_FORCED_RMMOD, 'R', ' ' }, 155 { TAINT_FORCED_RMMOD, 'R', ' ' },
155 { TAINT_MACHINE_CHECK, 'M', ' ' }, 156 { TAINT_MACHINE_CHECK, 'M', ' ' },
156 { TAINT_BAD_PAGE, 'B', ' ' }, 157 { TAINT_BAD_PAGE, 'B', ' ' },
157 { TAINT_USER, 'U', ' ' }, 158 { TAINT_USER, 'U', ' ' },
158 { TAINT_DIE, 'D', ' ' }, 159 { TAINT_DIE, 'D', ' ' },
159 { TAINT_OVERRIDDEN_ACPI_TABLE, 'A', ' ' }, 160 { TAINT_OVERRIDDEN_ACPI_TABLE, 'A', ' ' },
160 { TAINT_WARN, 'W', ' ' }, 161 { TAINT_WARN, 'W', ' ' },
161 { TAINT_CRAP, 'C', ' ' }, 162 { TAINT_CRAP, 'C', ' ' },
162}; 163};
163 164
164/** 165/**
@@ -195,7 +196,8 @@ const char *print_tainted(void)
195 *s = 0; 196 *s = 0;
196 } else 197 } else
197 snprintf(buf, sizeof(buf), "Not tainted"); 198 snprintf(buf, sizeof(buf), "Not tainted");
198 return(buf); 199
200 return buf;
199} 201}
200 202
201int test_taint(unsigned flag) 203int test_taint(unsigned flag)
@@ -211,7 +213,8 @@ unsigned long get_taint(void)
211 213
212void add_taint(unsigned flag) 214void add_taint(unsigned flag)
213{ 215{
214 debug_locks = 0; /* can't trust the integrity of the kernel anymore */ 216 /* can't trust the integrity of the kernel anymore: */
217 debug_locks = 0;
215 set_bit(flag, &tainted_mask); 218 set_bit(flag, &tainted_mask);
216} 219}
217EXPORT_SYMBOL(add_taint); 220EXPORT_SYMBOL(add_taint);
@@ -266,8 +269,8 @@ static void do_oops_enter_exit(void)
266} 269}
267 270
268/* 271/*
269 * Return true if the calling CPU is allowed to print oops-related info. This 272 * Return true if the calling CPU is allowed to print oops-related info.
270 * is a bit racy.. 273 * This is a bit racy..
271 */ 274 */
272int oops_may_print(void) 275int oops_may_print(void)
273{ 276{
@@ -276,20 +279,22 @@ int oops_may_print(void)
276 279
277/* 280/*
278 * Called when the architecture enters its oops handler, before it prints 281 * Called when the architecture enters its oops handler, before it prints
279 * anything. If this is the first CPU to oops, and it's oopsing the first time 282 * anything. If this is the first CPU to oops, and it's oopsing the first
280 * then let it proceed. 283 * time then let it proceed.
281 * 284 *
282 * This is all enabled by the pause_on_oops kernel boot option. We do all this 285 * This is all enabled by the pause_on_oops kernel boot option. We do all
283 * to ensure that oopses don't scroll off the screen. It has the side-effect 286 * this to ensure that oopses don't scroll off the screen. It has the
284 * of preventing later-oopsing CPUs from mucking up the display, too. 287 * side-effect of preventing later-oopsing CPUs from mucking up the display,
288 * too.
285 * 289 *
286 * It turns out that the CPU which is allowed to print ends up pausing for the 290 * It turns out that the CPU which is allowed to print ends up pausing for
287 * right duration, whereas all the other CPUs pause for twice as long: once in 291 * the right duration, whereas all the other CPUs pause for twice as long:
288 * oops_enter(), once in oops_exit(). 292 * once in oops_enter(), once in oops_exit().
289 */ 293 */
290void oops_enter(void) 294void oops_enter(void)
291{ 295{
292 debug_locks_off(); /* can't trust the integrity of the kernel anymore */ 296 /* can't trust the integrity of the kernel anymore: */
297 debug_locks_off();
293 do_oops_enter_exit(); 298 do_oops_enter_exit();
294} 299}
295 300
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index f3db382c2b2d..5f21ab2bbcdf 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -289,7 +289,7 @@ static int create_image(int platform_mode)
289 * hibernation_snapshot - quiesce devices and create the hibernation 289 * hibernation_snapshot - quiesce devices and create the hibernation
290 * snapshot image. 290 * snapshot image.
291 * @platform_mode - if set, use the platform driver, if available, to 291 * @platform_mode - if set, use the platform driver, if available, to
292 * prepare the platform frimware for the power transition. 292 * prepare the platform firmware for the power transition.
293 * 293 *
294 * Must be called with pm_mutex held 294 * Must be called with pm_mutex held
295 */ 295 */
@@ -412,7 +412,7 @@ static int resume_target_kernel(bool platform_mode)
412 * hibernation_restore - quiesce devices and restore the hibernation 412 * hibernation_restore - quiesce devices and restore the hibernation
413 * snapshot image. If successful, control returns in hibernation_snaphot() 413 * snapshot image. If successful, control returns in hibernation_snaphot()
414 * @platform_mode - if set, use the platform driver, if available, to 414 * @platform_mode - if set, use the platform driver, if available, to
415 * prepare the platform frimware for the transition. 415 * prepare the platform firmware for the transition.
416 * 416 *
417 * Must be called with pm_mutex held 417 * Must be called with pm_mutex held
418 */ 418 */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 5105f5a6a2ce..aaad0ec34194 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -687,8 +687,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data)
687 goto out_put_task_struct; 687 goto out_put_task_struct;
688 688
689 ret = arch_ptrace(child, request, addr, data); 689 ret = arch_ptrace(child, request, addr, data);
690 if (ret < 0)
691 goto out_put_task_struct;
692 690
693 out_put_task_struct: 691 out_put_task_struct:
694 put_task_struct(child); 692 put_task_struct(child);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index cae8a059cf47..2c7b8457d0d2 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -122,6 +122,8 @@ static void rcu_barrier_func(void *type)
122 } 122 }
123} 123}
124 124
125static inline void wait_migrated_callbacks(void);
126
125/* 127/*
126 * Orchestrate the specified type of RCU barrier, waiting for all 128 * Orchestrate the specified type of RCU barrier, waiting for all
127 * RCU callbacks of the specified type to complete. 129 * RCU callbacks of the specified type to complete.
@@ -147,6 +149,7 @@ static void _rcu_barrier(enum rcu_barrier type)
147 complete(&rcu_barrier_completion); 149 complete(&rcu_barrier_completion);
148 wait_for_completion(&rcu_barrier_completion); 150 wait_for_completion(&rcu_barrier_completion);
149 mutex_unlock(&rcu_barrier_mutex); 151 mutex_unlock(&rcu_barrier_mutex);
152 wait_migrated_callbacks();
150} 153}
151 154
152/** 155/**
@@ -176,9 +179,50 @@ void rcu_barrier_sched(void)
176} 179}
177EXPORT_SYMBOL_GPL(rcu_barrier_sched); 180EXPORT_SYMBOL_GPL(rcu_barrier_sched);
178 181
182static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
183static struct rcu_head rcu_migrate_head[3];
184static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
185
186static void rcu_migrate_callback(struct rcu_head *notused)
187{
188 if (atomic_dec_and_test(&rcu_migrate_type_count))
189 wake_up(&rcu_migrate_wq);
190}
191
192static inline void wait_migrated_callbacks(void)
193{
194 wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
195}
196
197static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
198 unsigned long action, void *hcpu)
199{
200 if (action == CPU_DYING) {
201 /*
202 * preempt_disable() in on_each_cpu() prevents stop_machine(),
203 * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);"
204 * returns, all online cpus have queued rcu_barrier_func(),
205 * and the dead cpu(if it exist) queues rcu_migrate_callback()s.
206 *
207 * These callbacks ensure _rcu_barrier() waits for all
208 * RCU callbacks of the specified type to complete.
209 */
210 atomic_set(&rcu_migrate_type_count, 3);
211 call_rcu_bh(rcu_migrate_head, rcu_migrate_callback);
212 call_rcu_sched(rcu_migrate_head + 1, rcu_migrate_callback);
213 call_rcu(rcu_migrate_head + 2, rcu_migrate_callback);
214 } else if (action == CPU_POST_DEAD) {
215 /* rcu_migrate_head is protected by cpu_add_remove_lock */
216 wait_migrated_callbacks();
217 }
218
219 return NOTIFY_OK;
220}
221
179void __init rcu_init(void) 222void __init rcu_init(void)
180{ 223{
181 __rcu_init(); 224 __rcu_init();
225 hotcpu_notifier(rcu_barrier_cpu_hotplug, 0);
182} 226}
183 227
184void rcu_scheduler_starting(void) 228void rcu_scheduler_starting(void)
diff --git a/kernel/sched.c b/kernel/sched.c
index 73513f4e19df..2325db2be31b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1110,7 +1110,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
1110 if (rq == this_rq()) { 1110 if (rq == this_rq()) {
1111 hrtimer_restart(timer); 1111 hrtimer_restart(timer);
1112 } else if (!rq->hrtick_csd_pending) { 1112 } else if (!rq->hrtick_csd_pending) {
1113 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd); 1113 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
1114 rq->hrtick_csd_pending = 1; 1114 rq->hrtick_csd_pending = 1;
1115 } 1115 }
1116} 1116}
diff --git a/kernel/slow-work.c b/kernel/slow-work.c
new file mode 100644
index 000000000000..cf2bc01186ef
--- /dev/null
+++ b/kernel/slow-work.c
@@ -0,0 +1,640 @@
1/* Worker thread pool for slow items, such as filesystem lookups or mkdirs
2 *
3 * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
10 *
11 * See Documentation/slow-work.txt
12 */
13
14#include <linux/module.h>
15#include <linux/slow-work.h>
16#include <linux/kthread.h>
17#include <linux/freezer.h>
18#include <linux/wait.h>
19
20#define SLOW_WORK_CULL_TIMEOUT (5 * HZ) /* cull threads 5s after running out of
21 * things to do */
22#define SLOW_WORK_OOM_TIMEOUT (5 * HZ) /* can't start new threads for 5s after
23 * OOM */
24
25static void slow_work_cull_timeout(unsigned long);
26static void slow_work_oom_timeout(unsigned long);
27
28#ifdef CONFIG_SYSCTL
29static int slow_work_min_threads_sysctl(struct ctl_table *, int, struct file *,
30 void __user *, size_t *, loff_t *);
31
32static int slow_work_max_threads_sysctl(struct ctl_table *, int , struct file *,
33 void __user *, size_t *, loff_t *);
34#endif
35
36/*
37 * The pool of threads has at least min threads in it as long as someone is
38 * using the facility, and may have as many as max.
39 *
40 * A portion of the pool may be processing very slow operations.
41 */
42static unsigned slow_work_min_threads = 2;
43static unsigned slow_work_max_threads = 4;
44static unsigned vslow_work_proportion = 50; /* % of threads that may process
45 * very slow work */
46
47#ifdef CONFIG_SYSCTL
48static const int slow_work_min_min_threads = 2;
49static int slow_work_max_max_threads = 255;
50static const int slow_work_min_vslow = 1;
51static const int slow_work_max_vslow = 99;
52
53ctl_table slow_work_sysctls[] = {
54 {
55 .ctl_name = CTL_UNNUMBERED,
56 .procname = "min-threads",
57 .data = &slow_work_min_threads,
58 .maxlen = sizeof(unsigned),
59 .mode = 0644,
60 .proc_handler = slow_work_min_threads_sysctl,
61 .extra1 = (void *) &slow_work_min_min_threads,
62 .extra2 = &slow_work_max_threads,
63 },
64 {
65 .ctl_name = CTL_UNNUMBERED,
66 .procname = "max-threads",
67 .data = &slow_work_max_threads,
68 .maxlen = sizeof(unsigned),
69 .mode = 0644,
70 .proc_handler = slow_work_max_threads_sysctl,
71 .extra1 = &slow_work_min_threads,
72 .extra2 = (void *) &slow_work_max_max_threads,
73 },
74 {
75 .ctl_name = CTL_UNNUMBERED,
76 .procname = "vslow-percentage",
77 .data = &vslow_work_proportion,
78 .maxlen = sizeof(unsigned),
79 .mode = 0644,
80 .proc_handler = &proc_dointvec_minmax,
81 .extra1 = (void *) &slow_work_min_vslow,
82 .extra2 = (void *) &slow_work_max_vslow,
83 },
84 { .ctl_name = 0 }
85};
86#endif
87
88/*
89 * The active state of the thread pool
90 */
91static atomic_t slow_work_thread_count;
92static atomic_t vslow_work_executing_count;
93
94static bool slow_work_may_not_start_new_thread;
95static bool slow_work_cull; /* cull a thread due to lack of activity */
96static DEFINE_TIMER(slow_work_cull_timer, slow_work_cull_timeout, 0, 0);
97static DEFINE_TIMER(slow_work_oom_timer, slow_work_oom_timeout, 0, 0);
98static struct slow_work slow_work_new_thread; /* new thread starter */
99
100/*
101 * The queues of work items and the lock governing access to them. These are
102 * shared between all the CPUs. It doesn't make sense to have per-CPU queues
103 * as the number of threads bears no relation to the number of CPUs.
104 *
105 * There are two queues of work items: one for slow work items, and one for
106 * very slow work items.
107 */
108static LIST_HEAD(slow_work_queue);
109static LIST_HEAD(vslow_work_queue);
110static DEFINE_SPINLOCK(slow_work_queue_lock);
111
112/*
113 * The thread controls. A variable used to signal to the threads that they
114 * should exit when the queue is empty, a waitqueue used by the threads to wait
115 * for signals, and a completion set by the last thread to exit.
116 */
117static bool slow_work_threads_should_exit;
118static DECLARE_WAIT_QUEUE_HEAD(slow_work_thread_wq);
119static DECLARE_COMPLETION(slow_work_last_thread_exited);
120
121/*
122 * The number of users of the thread pool and its lock. Whilst this is zero we
123 * have no threads hanging around, and when this reaches zero, we wait for all
124 * active or queued work items to complete and kill all the threads we do have.
125 */
126static int slow_work_user_count;
127static DEFINE_MUTEX(slow_work_user_lock);
128
129/*
130 * Calculate the maximum number of active threads in the pool that are
131 * permitted to process very slow work items.
132 *
133 * The answer is rounded up to at least 1, but may not equal or exceed the
134 * maximum number of the threads in the pool. This means we always have at
135 * least one thread that can process slow work items, and we always have at
136 * least one thread that won't get tied up doing so.
137 */
138static unsigned slow_work_calc_vsmax(void)
139{
140 unsigned vsmax;
141
142 vsmax = atomic_read(&slow_work_thread_count) * vslow_work_proportion;
143 vsmax /= 100;
144 vsmax = max(vsmax, 1U);
145 return min(vsmax, slow_work_max_threads - 1);
146}
147
148/*
149 * Attempt to execute stuff queued on a slow thread. Return true if we managed
150 * it, false if there was nothing to do.
151 */
152static bool slow_work_execute(void)
153{
154 struct slow_work *work = NULL;
155 unsigned vsmax;
156 bool very_slow;
157
158 vsmax = slow_work_calc_vsmax();
159
160 /* see if we can schedule a new thread to be started if we're not
161 * keeping up with the work */
162 if (!waitqueue_active(&slow_work_thread_wq) &&
163 (!list_empty(&slow_work_queue) || !list_empty(&vslow_work_queue)) &&
164 atomic_read(&slow_work_thread_count) < slow_work_max_threads &&
165 !slow_work_may_not_start_new_thread)
166 slow_work_enqueue(&slow_work_new_thread);
167
168 /* find something to execute */
169 spin_lock_irq(&slow_work_queue_lock);
170 if (!list_empty(&vslow_work_queue) &&
171 atomic_read(&vslow_work_executing_count) < vsmax) {
172 work = list_entry(vslow_work_queue.next,
173 struct slow_work, link);
174 if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags))
175 BUG();
176 list_del_init(&work->link);
177 atomic_inc(&vslow_work_executing_count);
178 very_slow = true;
179 } else if (!list_empty(&slow_work_queue)) {
180 work = list_entry(slow_work_queue.next,
181 struct slow_work, link);
182 if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags))
183 BUG();
184 list_del_init(&work->link);
185 very_slow = false;
186 } else {
187 very_slow = false; /* avoid the compiler warning */
188 }
189 spin_unlock_irq(&slow_work_queue_lock);
190
191 if (!work)
192 return false;
193
194 if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags))
195 BUG();
196
197 work->ops->execute(work);
198
199 if (very_slow)
200 atomic_dec(&vslow_work_executing_count);
201 clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags);
202
203 /* if someone tried to enqueue the item whilst we were executing it,
204 * then it'll be left unenqueued to avoid multiple threads trying to
205 * execute it simultaneously
206 *
207 * there is, however, a race between us testing the pending flag and
208 * getting the spinlock, and between the enqueuer setting the pending
209 * flag and getting the spinlock, so we use a deferral bit to tell us
210 * if the enqueuer got there first
211 */
212 if (test_bit(SLOW_WORK_PENDING, &work->flags)) {
213 spin_lock_irq(&slow_work_queue_lock);
214
215 if (!test_bit(SLOW_WORK_EXECUTING, &work->flags) &&
216 test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags))
217 goto auto_requeue;
218
219 spin_unlock_irq(&slow_work_queue_lock);
220 }
221
222 work->ops->put_ref(work);
223 return true;
224
225auto_requeue:
226 /* we must complete the enqueue operation
227 * - we transfer our ref on the item back to the appropriate queue
228 * - don't wake another thread up as we're awake already
229 */
230 if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
231 list_add_tail(&work->link, &vslow_work_queue);
232 else
233 list_add_tail(&work->link, &slow_work_queue);
234 spin_unlock_irq(&slow_work_queue_lock);
235 return true;
236}
237
238/**
239 * slow_work_enqueue - Schedule a slow work item for processing
240 * @work: The work item to queue
241 *
242 * Schedule a slow work item for processing. If the item is already undergoing
243 * execution, this guarantees not to re-enter the execution routine until the
244 * first execution finishes.
245 *
246 * The item is pinned by this function as it retains a reference to it, managed
247 * through the item operations. The item is unpinned once it has been
248 * executed.
249 *
250 * An item may hog the thread that is running it for a relatively large amount
251 * of time, sufficient, for example, to perform several lookup, mkdir, create
252 * and setxattr operations. It may sleep on I/O and may sleep to obtain locks.
253 *
254 * Conversely, if a number of items are awaiting processing, it may take some
255 * time before any given item is given attention. The number of threads in the
256 * pool may be increased to deal with demand, but only up to a limit.
257 *
258 * If SLOW_WORK_VERY_SLOW is set on the work item, then it will be placed in
259 * the very slow queue, from which only a portion of the threads will be
260 * allowed to pick items to execute. This ensures that very slow items won't
261 * overly block ones that are just ordinarily slow.
262 *
263 * Returns 0 if successful, -EAGAIN if not.
264 */
265int slow_work_enqueue(struct slow_work *work)
266{
267 unsigned long flags;
268
269 BUG_ON(slow_work_user_count <= 0);
270 BUG_ON(!work);
271 BUG_ON(!work->ops);
272 BUG_ON(!work->ops->get_ref);
273
274 /* when honouring an enqueue request, we only promise that we will run
275 * the work function in the future; we do not promise to run it once
276 * per enqueue request
277 *
278 * we use the PENDING bit to merge together repeat requests without
279 * having to disable IRQs and take the spinlock, whilst still
280 * maintaining our promise
281 */
282 if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) {
283 spin_lock_irqsave(&slow_work_queue_lock, flags);
284
285 /* we promise that we will not attempt to execute the work
286 * function in more than one thread simultaneously
287 *
288 * this, however, leaves us with a problem if we're asked to
289 * enqueue the work whilst someone is executing the work
290 * function as simply queueing the work immediately means that
291 * another thread may try executing it whilst it is already
292 * under execution
293 *
294 * to deal with this, we set the ENQ_DEFERRED bit instead of
295 * enqueueing, and the thread currently executing the work
296 * function will enqueue the work item when the work function
297 * returns and it has cleared the EXECUTING bit
298 */
299 if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) {
300 set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags);
301 } else {
302 if (work->ops->get_ref(work) < 0)
303 goto cant_get_ref;
304 if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags))
305 list_add_tail(&work->link, &vslow_work_queue);
306 else
307 list_add_tail(&work->link, &slow_work_queue);
308 wake_up(&slow_work_thread_wq);
309 }
310
311 spin_unlock_irqrestore(&slow_work_queue_lock, flags);
312 }
313 return 0;
314
315cant_get_ref:
316 spin_unlock_irqrestore(&slow_work_queue_lock, flags);
317 return -EAGAIN;
318}
319EXPORT_SYMBOL(slow_work_enqueue);
320
321/*
322 * Worker thread culling algorithm
323 */
324static bool slow_work_cull_thread(void)
325{
326 unsigned long flags;
327 bool do_cull = false;
328
329 spin_lock_irqsave(&slow_work_queue_lock, flags);
330
331 if (slow_work_cull) {
332 slow_work_cull = false;
333
334 if (list_empty(&slow_work_queue) &&
335 list_empty(&vslow_work_queue) &&
336 atomic_read(&slow_work_thread_count) >
337 slow_work_min_threads) {
338 mod_timer(&slow_work_cull_timer,
339 jiffies + SLOW_WORK_CULL_TIMEOUT);
340 do_cull = true;
341 }
342 }
343
344 spin_unlock_irqrestore(&slow_work_queue_lock, flags);
345 return do_cull;
346}
347
348/*
349 * Determine if there is slow work available for dispatch
350 */
351static inline bool slow_work_available(int vsmax)
352{
353 return !list_empty(&slow_work_queue) ||
354 (!list_empty(&vslow_work_queue) &&
355 atomic_read(&vslow_work_executing_count) < vsmax);
356}
357
358/*
359 * Worker thread dispatcher
360 */
361static int slow_work_thread(void *_data)
362{
363 int vsmax;
364
365 DEFINE_WAIT(wait);
366
367 set_freezable();
368 set_user_nice(current, -5);
369
370 for (;;) {
371 vsmax = vslow_work_proportion;
372 vsmax *= atomic_read(&slow_work_thread_count);
373 vsmax /= 100;
374
375 prepare_to_wait(&slow_work_thread_wq, &wait,
376 TASK_INTERRUPTIBLE);
377 if (!freezing(current) &&
378 !slow_work_threads_should_exit &&
379 !slow_work_available(vsmax) &&
380 !slow_work_cull)
381 schedule();
382 finish_wait(&slow_work_thread_wq, &wait);
383
384 try_to_freeze();
385
386 vsmax = vslow_work_proportion;
387 vsmax *= atomic_read(&slow_work_thread_count);
388 vsmax /= 100;
389
390 if (slow_work_available(vsmax) && slow_work_execute()) {
391 cond_resched();
392 if (list_empty(&slow_work_queue) &&
393 list_empty(&vslow_work_queue) &&
394 atomic_read(&slow_work_thread_count) >
395 slow_work_min_threads)
396 mod_timer(&slow_work_cull_timer,
397 jiffies + SLOW_WORK_CULL_TIMEOUT);
398 continue;
399 }
400
401 if (slow_work_threads_should_exit)
402 break;
403
404 if (slow_work_cull && slow_work_cull_thread())
405 break;
406 }
407
408 if (atomic_dec_and_test(&slow_work_thread_count))
409 complete_and_exit(&slow_work_last_thread_exited, 0);
410 return 0;
411}
412
413/*
414 * Handle thread cull timer expiration
415 */
416static void slow_work_cull_timeout(unsigned long data)
417{
418 slow_work_cull = true;
419 wake_up(&slow_work_thread_wq);
420}
421
422/*
423 * Get a reference on slow work thread starter
424 */
425static int slow_work_new_thread_get_ref(struct slow_work *work)
426{
427 return 0;
428}
429
430/*
431 * Drop a reference on slow work thread starter
432 */
433static void slow_work_new_thread_put_ref(struct slow_work *work)
434{
435}
436
437/*
438 * Start a new slow work thread
439 */
440static void slow_work_new_thread_execute(struct slow_work *work)
441{
442 struct task_struct *p;
443
444 if (slow_work_threads_should_exit)
445 return;
446
447 if (atomic_read(&slow_work_thread_count) >= slow_work_max_threads)
448 return;
449
450 if (!mutex_trylock(&slow_work_user_lock))
451 return;
452
453 slow_work_may_not_start_new_thread = true;
454 atomic_inc(&slow_work_thread_count);
455 p = kthread_run(slow_work_thread, NULL, "kslowd");
456 if (IS_ERR(p)) {
457 printk(KERN_DEBUG "Slow work thread pool: OOM\n");
458 if (atomic_dec_and_test(&slow_work_thread_count))
459 BUG(); /* we're running on a slow work thread... */
460 mod_timer(&slow_work_oom_timer,
461 jiffies + SLOW_WORK_OOM_TIMEOUT);
462 } else {
463 /* ratelimit the starting of new threads */
464 mod_timer(&slow_work_oom_timer, jiffies + 1);
465 }
466
467 mutex_unlock(&slow_work_user_lock);
468}
469
470static const struct slow_work_ops slow_work_new_thread_ops = {
471 .get_ref = slow_work_new_thread_get_ref,
472 .put_ref = slow_work_new_thread_put_ref,
473 .execute = slow_work_new_thread_execute,
474};
475
476/*
477 * post-OOM new thread start suppression expiration
478 */
479static void slow_work_oom_timeout(unsigned long data)
480{
481 slow_work_may_not_start_new_thread = false;
482}
483
484#ifdef CONFIG_SYSCTL
485/*
486 * Handle adjustment of the minimum number of threads
487 */
488static int slow_work_min_threads_sysctl(struct ctl_table *table, int write,
489 struct file *filp, void __user *buffer,
490 size_t *lenp, loff_t *ppos)
491{
492 int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
493 int n;
494
495 if (ret == 0) {
496 mutex_lock(&slow_work_user_lock);
497 if (slow_work_user_count > 0) {
498 /* see if we need to start or stop threads */
499 n = atomic_read(&slow_work_thread_count) -
500 slow_work_min_threads;
501
502 if (n < 0 && !slow_work_may_not_start_new_thread)
503 slow_work_enqueue(&slow_work_new_thread);
504 else if (n > 0)
505 mod_timer(&slow_work_cull_timer,
506 jiffies + SLOW_WORK_CULL_TIMEOUT);
507 }
508 mutex_unlock(&slow_work_user_lock);
509 }
510
511 return ret;
512}
513
514/*
515 * Handle adjustment of the maximum number of threads
516 */
517static int slow_work_max_threads_sysctl(struct ctl_table *table, int write,
518 struct file *filp, void __user *buffer,
519 size_t *lenp, loff_t *ppos)
520{
521 int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos);
522 int n;
523
524 if (ret == 0) {
525 mutex_lock(&slow_work_user_lock);
526 if (slow_work_user_count > 0) {
527 /* see if we need to stop threads */
528 n = slow_work_max_threads -
529 atomic_read(&slow_work_thread_count);
530
531 if (n < 0)
532 mod_timer(&slow_work_cull_timer,
533 jiffies + SLOW_WORK_CULL_TIMEOUT);
534 }
535 mutex_unlock(&slow_work_user_lock);
536 }
537
538 return ret;
539}
540#endif /* CONFIG_SYSCTL */
541
542/**
543 * slow_work_register_user - Register a user of the facility
544 *
545 * Register a user of the facility, starting up the initial threads if there
546 * aren't any other users at this point. This will return 0 if successful, or
547 * an error if not.
548 */
549int slow_work_register_user(void)
550{
551 struct task_struct *p;
552 int loop;
553
554 mutex_lock(&slow_work_user_lock);
555
556 if (slow_work_user_count == 0) {
557 printk(KERN_NOTICE "Slow work thread pool: Starting up\n");
558 init_completion(&slow_work_last_thread_exited);
559
560 slow_work_threads_should_exit = false;
561 slow_work_init(&slow_work_new_thread,
562 &slow_work_new_thread_ops);
563 slow_work_may_not_start_new_thread = false;
564 slow_work_cull = false;
565
566 /* start the minimum number of threads */
567 for (loop = 0; loop < slow_work_min_threads; loop++) {
568 atomic_inc(&slow_work_thread_count);
569 p = kthread_run(slow_work_thread, NULL, "kslowd");
570 if (IS_ERR(p))
571 goto error;
572 }
573 printk(KERN_NOTICE "Slow work thread pool: Ready\n");
574 }
575
576 slow_work_user_count++;
577 mutex_unlock(&slow_work_user_lock);
578 return 0;
579
580error:
581 if (atomic_dec_and_test(&slow_work_thread_count))
582 complete(&slow_work_last_thread_exited);
583 if (loop > 0) {
584 printk(KERN_ERR "Slow work thread pool:"
585 " Aborting startup on ENOMEM\n");
586 slow_work_threads_should_exit = true;
587 wake_up_all(&slow_work_thread_wq);
588 wait_for_completion(&slow_work_last_thread_exited);
589 printk(KERN_ERR "Slow work thread pool: Aborted\n");
590 }
591 mutex_unlock(&slow_work_user_lock);
592 return PTR_ERR(p);
593}
594EXPORT_SYMBOL(slow_work_register_user);
595
596/**
597 * slow_work_unregister_user - Unregister a user of the facility
598 *
599 * Unregister a user of the facility, killing all the threads if this was the
600 * last one.
601 */
602void slow_work_unregister_user(void)
603{
604 mutex_lock(&slow_work_user_lock);
605
606 BUG_ON(slow_work_user_count <= 0);
607
608 slow_work_user_count--;
609 if (slow_work_user_count == 0) {
610 printk(KERN_NOTICE "Slow work thread pool: Shutting down\n");
611 slow_work_threads_should_exit = true;
612 wake_up_all(&slow_work_thread_wq);
613 wait_for_completion(&slow_work_last_thread_exited);
614 printk(KERN_NOTICE "Slow work thread pool:"
615 " Shut down complete\n");
616 }
617
618 del_timer_sync(&slow_work_cull_timer);
619
620 mutex_unlock(&slow_work_user_lock);
621}
622EXPORT_SYMBOL(slow_work_unregister_user);
623
624/*
625 * Initialise the slow work facility
626 */
627static int __init init_slow_work(void)
628{
629 unsigned nr_cpus = num_possible_cpus();
630
631 if (slow_work_max_threads < nr_cpus)
632 slow_work_max_threads = nr_cpus;
633#ifdef CONFIG_SYSCTL
634 if (slow_work_max_max_threads < nr_cpus * 2)
635 slow_work_max_max_threads = nr_cpus * 2;
636#endif
637 return 0;
638}
639
640subsys_initcall(init_slow_work);
diff --git a/kernel/smp.c b/kernel/smp.c
index bbedbb7efe32..858baac568ee 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -2,40 +2,82 @@
2 * Generic helpers for smp ipi calls 2 * Generic helpers for smp ipi calls
3 * 3 *
4 * (C) Jens Axboe <jens.axboe@oracle.com> 2008 4 * (C) Jens Axboe <jens.axboe@oracle.com> 2008
5 *
6 */ 5 */
7#include <linux/init.h>
8#include <linux/module.h>
9#include <linux/percpu.h>
10#include <linux/rcupdate.h> 6#include <linux/rcupdate.h>
11#include <linux/rculist.h> 7#include <linux/rculist.h>
8#include <linux/kernel.h>
9#include <linux/module.h>
10#include <linux/percpu.h>
11#include <linux/init.h>
12#include <linux/smp.h> 12#include <linux/smp.h>
13#include <linux/cpu.h>
13 14
14static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); 15static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
15static LIST_HEAD(call_function_queue); 16
16__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock); 17static struct {
18 struct list_head queue;
19 spinlock_t lock;
20} call_function __cacheline_aligned_in_smp =
21 {
22 .queue = LIST_HEAD_INIT(call_function.queue),
23 .lock = __SPIN_LOCK_UNLOCKED(call_function.lock),
24 };
17 25
18enum { 26enum {
19 CSD_FLAG_WAIT = 0x01, 27 CSD_FLAG_LOCK = 0x01,
20 CSD_FLAG_ALLOC = 0x02,
21 CSD_FLAG_LOCK = 0x04,
22}; 28};
23 29
24struct call_function_data { 30struct call_function_data {
25 struct call_single_data csd; 31 struct call_single_data csd;
26 spinlock_t lock; 32 spinlock_t lock;
27 unsigned int refs; 33 unsigned int refs;
28 struct rcu_head rcu_head; 34 cpumask_var_t cpumask;
29 unsigned long cpumask_bits[];
30}; 35};
31 36
32struct call_single_queue { 37struct call_single_queue {
33 struct list_head list; 38 struct list_head list;
34 spinlock_t lock; 39 spinlock_t lock;
40};
41
42static DEFINE_PER_CPU(struct call_function_data, cfd_data) = {
43 .lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock),
44};
45
46static int
47hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
48{
49 long cpu = (long)hcpu;
50 struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
51
52 switch (action) {
53 case CPU_UP_PREPARE:
54 case CPU_UP_PREPARE_FROZEN:
55 if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
56 cpu_to_node(cpu)))
57 return NOTIFY_BAD;
58 break;
59
60#ifdef CONFIG_CPU_HOTPLUG
61 case CPU_UP_CANCELED:
62 case CPU_UP_CANCELED_FROZEN:
63
64 case CPU_DEAD:
65 case CPU_DEAD_FROZEN:
66 free_cpumask_var(cfd->cpumask);
67 break;
68#endif
69 };
70
71 return NOTIFY_OK;
72}
73
74static struct notifier_block __cpuinitdata hotplug_cfd_notifier = {
75 .notifier_call = hotplug_cfd,
35}; 76};
36 77
37static int __cpuinit init_call_single_data(void) 78static int __cpuinit init_call_single_data(void)
38{ 79{
80 void *cpu = (void *)(long)smp_processor_id();
39 int i; 81 int i;
40 82
41 for_each_possible_cpu(i) { 83 for_each_possible_cpu(i) {
@@ -44,29 +86,63 @@ static int __cpuinit init_call_single_data(void)
44 spin_lock_init(&q->lock); 86 spin_lock_init(&q->lock);
45 INIT_LIST_HEAD(&q->list); 87 INIT_LIST_HEAD(&q->list);
46 } 88 }
89
90 hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
91 register_cpu_notifier(&hotplug_cfd_notifier);
92
47 return 0; 93 return 0;
48} 94}
49early_initcall(init_call_single_data); 95early_initcall(init_call_single_data);
50 96
51static void csd_flag_wait(struct call_single_data *data) 97/*
98 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
99 *
100 * For non-synchronous ipi calls the csd can still be in use by the
101 * previous function call. For multi-cpu calls its even more interesting
102 * as we'll have to ensure no other cpu is observing our csd.
103 */
104static void csd_lock_wait(struct call_single_data *data)
52{ 105{
53 /* Wait for response */ 106 while (data->flags & CSD_FLAG_LOCK)
54 do {
55 if (!(data->flags & CSD_FLAG_WAIT))
56 break;
57 cpu_relax(); 107 cpu_relax();
58 } while (1); 108}
109
110static void csd_lock(struct call_single_data *data)
111{
112 csd_lock_wait(data);
113 data->flags = CSD_FLAG_LOCK;
114
115 /*
116 * prevent CPU from reordering the above assignment
117 * to ->flags with any subsequent assignments to other
118 * fields of the specified call_single_data structure:
119 */
120 smp_mb();
121}
122
123static void csd_unlock(struct call_single_data *data)
124{
125 WARN_ON(!(data->flags & CSD_FLAG_LOCK));
126
127 /*
128 * ensure we're all done before releasing data:
129 */
130 smp_mb();
131
132 data->flags &= ~CSD_FLAG_LOCK;
59} 133}
60 134
61/* 135/*
62 * Insert a previously allocated call_single_data element for execution 136 * Insert a previously allocated call_single_data element
63 * on the given CPU. data must already have ->func, ->info, and ->flags set. 137 * for execution on the given CPU. data must already have
138 * ->func, ->info, and ->flags set.
64 */ 139 */
65static void generic_exec_single(int cpu, struct call_single_data *data) 140static
141void generic_exec_single(int cpu, struct call_single_data *data, int wait)
66{ 142{
67 struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); 143 struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
68 int wait = data->flags & CSD_FLAG_WAIT, ipi;
69 unsigned long flags; 144 unsigned long flags;
145 int ipi;
70 146
71 spin_lock_irqsave(&dst->lock, flags); 147 spin_lock_irqsave(&dst->lock, flags);
72 ipi = list_empty(&dst->list); 148 ipi = list_empty(&dst->list);
@@ -74,24 +150,21 @@ static void generic_exec_single(int cpu, struct call_single_data *data)
74 spin_unlock_irqrestore(&dst->lock, flags); 150 spin_unlock_irqrestore(&dst->lock, flags);
75 151
76 /* 152 /*
77 * Make the list addition visible before sending the ipi. 153 * The list addition should be visible before sending the IPI
154 * handler locks the list to pull the entry off it because of
155 * normal cache coherency rules implied by spinlocks.
156 *
157 * If IPIs can go out of order to the cache coherency protocol
158 * in an architecture, sufficient synchronisation should be added
159 * to arch code to make it appear to obey cache coherency WRT
160 * locking and barrier primitives. Generic code isn't really
161 * equipped to do the right thing...
78 */ 162 */
79 smp_mb();
80
81 if (ipi) 163 if (ipi)
82 arch_send_call_function_single_ipi(cpu); 164 arch_send_call_function_single_ipi(cpu);
83 165
84 if (wait) 166 if (wait)
85 csd_flag_wait(data); 167 csd_lock_wait(data);
86}
87
88static void rcu_free_call_data(struct rcu_head *head)
89{
90 struct call_function_data *data;
91
92 data = container_of(head, struct call_function_data, rcu_head);
93
94 kfree(data);
95} 168}
96 169
97/* 170/*
@@ -104,99 +177,83 @@ void generic_smp_call_function_interrupt(void)
104 int cpu = get_cpu(); 177 int cpu = get_cpu();
105 178
106 /* 179 /*
107 * It's ok to use list_for_each_rcu() here even though we may delete 180 * Ensure entry is visible on call_function_queue after we have
108 * 'pos', since list_del_rcu() doesn't clear ->next 181 * entered the IPI. See comment in smp_call_function_many.
182 * If we don't have this, then we may miss an entry on the list
183 * and never get another IPI to process it.
184 */
185 smp_mb();
186
187 /*
188 * It's ok to use list_for_each_rcu() here even though we may
189 * delete 'pos', since list_del_rcu() doesn't clear ->next
109 */ 190 */
110 rcu_read_lock(); 191 list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
111 list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
112 int refs; 192 int refs;
113 193
114 if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits))) 194 spin_lock(&data->lock);
195 if (!cpumask_test_cpu(cpu, data->cpumask)) {
196 spin_unlock(&data->lock);
115 continue; 197 continue;
198 }
199 cpumask_clear_cpu(cpu, data->cpumask);
200 spin_unlock(&data->lock);
116 201
117 data->csd.func(data->csd.info); 202 data->csd.func(data->csd.info);
118 203
119 spin_lock(&data->lock); 204 spin_lock(&data->lock);
120 cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits));
121 WARN_ON(data->refs == 0); 205 WARN_ON(data->refs == 0);
122 data->refs--; 206 refs = --data->refs;
123 refs = data->refs; 207 if (!refs) {
208 spin_lock(&call_function.lock);
209 list_del_rcu(&data->csd.list);
210 spin_unlock(&call_function.lock);
211 }
124 spin_unlock(&data->lock); 212 spin_unlock(&data->lock);
125 213
126 if (refs) 214 if (refs)
127 continue; 215 continue;
128 216
129 spin_lock(&call_function_lock); 217 csd_unlock(&data->csd);
130 list_del_rcu(&data->csd.list);
131 spin_unlock(&call_function_lock);
132
133 if (data->csd.flags & CSD_FLAG_WAIT) {
134 /*
135 * serialize stores to data with the flag clear
136 * and wakeup
137 */
138 smp_wmb();
139 data->csd.flags &= ~CSD_FLAG_WAIT;
140 }
141 if (data->csd.flags & CSD_FLAG_ALLOC)
142 call_rcu(&data->rcu_head, rcu_free_call_data);
143 } 218 }
144 rcu_read_unlock();
145 219
146 put_cpu(); 220 put_cpu();
147} 221}
148 222
149/* 223/*
150 * Invoked by arch to handle an IPI for call function single. Must be called 224 * Invoked by arch to handle an IPI for call function single. Must be
151 * from the arch with interrupts disabled. 225 * called from the arch with interrupts disabled.
152 */ 226 */
153void generic_smp_call_function_single_interrupt(void) 227void generic_smp_call_function_single_interrupt(void)
154{ 228{
155 struct call_single_queue *q = &__get_cpu_var(call_single_queue); 229 struct call_single_queue *q = &__get_cpu_var(call_single_queue);
230 unsigned int data_flags;
156 LIST_HEAD(list); 231 LIST_HEAD(list);
157 232
158 /* 233 spin_lock(&q->lock);
159 * Need to see other stores to list head for checking whether 234 list_replace_init(&q->list, &list);
160 * list is empty without holding q->lock 235 spin_unlock(&q->lock);
161 */ 236
162 smp_read_barrier_depends(); 237 while (!list_empty(&list)) {
163 while (!list_empty(&q->list)) { 238 struct call_single_data *data;
164 unsigned int data_flags; 239
165 240 data = list_entry(list.next, struct call_single_data, list);
166 spin_lock(&q->lock); 241 list_del(&data->list);
167 list_replace_init(&q->list, &list); 242
168 spin_unlock(&q->lock); 243 /*
169 244 * 'data' can be invalid after this call if flags == 0
170 while (!list_empty(&list)) { 245 * (when called through generic_exec_single()),
171 struct call_single_data *data; 246 * so save them away before making the call:
172 247 */
173 data = list_entry(list.next, struct call_single_data, 248 data_flags = data->flags;
174 list); 249
175 list_del(&data->list); 250 data->func(data->info);
176 251
177 /*
178 * 'data' can be invalid after this call if
179 * flags == 0 (when called through
180 * generic_exec_single(), so save them away before
181 * making the call.
182 */
183 data_flags = data->flags;
184
185 data->func(data->info);
186
187 if (data_flags & CSD_FLAG_WAIT) {
188 smp_wmb();
189 data->flags &= ~CSD_FLAG_WAIT;
190 } else if (data_flags & CSD_FLAG_LOCK) {
191 smp_wmb();
192 data->flags &= ~CSD_FLAG_LOCK;
193 } else if (data_flags & CSD_FLAG_ALLOC)
194 kfree(data);
195 }
196 /* 252 /*
197 * See comment on outer loop 253 * Unlocked CSDs are valid through generic_exec_single():
198 */ 254 */
199 smp_read_barrier_depends(); 255 if (data_flags & CSD_FLAG_LOCK)
256 csd_unlock(data);
200 } 257 }
201} 258}
202 259
@@ -215,65 +272,45 @@ static DEFINE_PER_CPU(struct call_single_data, csd_data);
215int smp_call_function_single(int cpu, void (*func) (void *info), void *info, 272int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
216 int wait) 273 int wait)
217{ 274{
218 struct call_single_data d; 275 struct call_single_data d = {
276 .flags = 0,
277 };
219 unsigned long flags; 278 unsigned long flags;
220 /* prevent preemption and reschedule on another processor, 279 int this_cpu;
221 as well as CPU removal */
222 int me = get_cpu();
223 int err = 0; 280 int err = 0;
224 281
282 /*
283 * prevent preemption and reschedule on another processor,
284 * as well as CPU removal
285 */
286 this_cpu = get_cpu();
287
225 /* Can deadlock when called with interrupts disabled */ 288 /* Can deadlock when called with interrupts disabled */
226 WARN_ON(irqs_disabled()); 289 WARN_ON_ONCE(irqs_disabled() && !oops_in_progress);
227 290
228 if (cpu == me) { 291 if (cpu == this_cpu) {
229 local_irq_save(flags); 292 local_irq_save(flags);
230 func(info); 293 func(info);
231 local_irq_restore(flags); 294 local_irq_restore(flags);
232 } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { 295 } else {
233 struct call_single_data *data; 296 if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
297 struct call_single_data *data = &d;
298
299 if (!wait)
300 data = &__get_cpu_var(csd_data);
234 301
235 if (!wait) { 302 csd_lock(data);
236 /* 303
237 * We are calling a function on a single CPU 304 data->func = func;
238 * and we are not going to wait for it to finish. 305 data->info = info;
239 * We first try to allocate the data, but if we 306 generic_exec_single(cpu, data, wait);
240 * fail, we fall back to use a per cpu data to pass
241 * the information to that CPU. Since all callers
242 * of this code will use the same data, we must
243 * synchronize the callers to prevent a new caller
244 * from corrupting the data before the callee
245 * can access it.
246 *
247 * The CSD_FLAG_LOCK is used to let us know when
248 * the IPI handler is done with the data.
249 * The first caller will set it, and the callee
250 * will clear it. The next caller must wait for
251 * it to clear before we set it again. This
252 * will make sure the callee is done with the
253 * data before a new caller will use it.
254 */
255 data = kmalloc(sizeof(*data), GFP_ATOMIC);
256 if (data)
257 data->flags = CSD_FLAG_ALLOC;
258 else {
259 data = &per_cpu(csd_data, me);
260 while (data->flags & CSD_FLAG_LOCK)
261 cpu_relax();
262 data->flags = CSD_FLAG_LOCK;
263 }
264 } else { 307 } else {
265 data = &d; 308 err = -ENXIO; /* CPU not online */
266 data->flags = CSD_FLAG_WAIT;
267 } 309 }
268
269 data->func = func;
270 data->info = info;
271 generic_exec_single(cpu, data);
272 } else {
273 err = -ENXIO; /* CPU not online */
274 } 310 }
275 311
276 put_cpu(); 312 put_cpu();
313
277 return err; 314 return err;
278} 315}
279EXPORT_SYMBOL(smp_call_function_single); 316EXPORT_SYMBOL(smp_call_function_single);
@@ -283,23 +320,26 @@ EXPORT_SYMBOL(smp_call_function_single);
283 * @cpu: The CPU to run on. 320 * @cpu: The CPU to run on.
284 * @data: Pre-allocated and setup data structure 321 * @data: Pre-allocated and setup data structure
285 * 322 *
286 * Like smp_call_function_single(), but allow caller to pass in a pre-allocated 323 * Like smp_call_function_single(), but allow caller to pass in a
287 * data structure. Useful for embedding @data inside other structures, for 324 * pre-allocated data structure. Useful for embedding @data inside
288 * instance. 325 * other structures, for instance.
289 *
290 */ 326 */
291void __smp_call_function_single(int cpu, struct call_single_data *data) 327void __smp_call_function_single(int cpu, struct call_single_data *data,
328 int wait)
292{ 329{
330 csd_lock(data);
331
293 /* Can deadlock when called with interrupts disabled */ 332 /* Can deadlock when called with interrupts disabled */
294 WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled()); 333 WARN_ON_ONCE(wait && irqs_disabled() && !oops_in_progress);
295 334
296 generic_exec_single(cpu, data); 335 generic_exec_single(cpu, data, wait);
297} 336}
298 337
299/* FIXME: Shim for archs using old arch_send_call_function_ipi API. */ 338/* Deprecated: shim for archs using old arch_send_call_function_ipi API. */
339
300#ifndef arch_send_call_function_ipi_mask 340#ifndef arch_send_call_function_ipi_mask
301#define arch_send_call_function_ipi_mask(maskp) \ 341# define arch_send_call_function_ipi_mask(maskp) \
302 arch_send_call_function_ipi(*(maskp)) 342 arch_send_call_function_ipi(*(maskp))
303#endif 343#endif
304 344
305/** 345/**
@@ -307,7 +347,8 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
307 * @mask: The set of cpus to run on (only runs on online subset). 347 * @mask: The set of cpus to run on (only runs on online subset).
308 * @func: The function to run. This must be fast and non-blocking. 348 * @func: The function to run. This must be fast and non-blocking.
309 * @info: An arbitrary pointer to pass to the function. 349 * @info: An arbitrary pointer to pass to the function.
310 * @wait: If true, wait (atomically) until function has completed on other CPUs. 350 * @wait: If true, wait (atomically) until function has completed
351 * on other CPUs.
311 * 352 *
312 * If @wait is true, then returns once @func has returned. Note that @wait 353 * If @wait is true, then returns once @func has returned. Note that @wait
313 * will be implicitly turned on in case of allocation failures, since 354 * will be implicitly turned on in case of allocation failures, since
@@ -318,27 +359,27 @@ void __smp_call_function_single(int cpu, struct call_single_data *data)
318 * must be disabled when calling this function. 359 * must be disabled when calling this function.
319 */ 360 */
320void smp_call_function_many(const struct cpumask *mask, 361void smp_call_function_many(const struct cpumask *mask,
321 void (*func)(void *), void *info, 362 void (*func)(void *), void *info, bool wait)
322 bool wait)
323{ 363{
324 struct call_function_data *data; 364 struct call_function_data *data;
325 unsigned long flags; 365 unsigned long flags;
326 int cpu, next_cpu; 366 int cpu, next_cpu, this_cpu = smp_processor_id();
327 367
328 /* Can deadlock when called with interrupts disabled */ 368 /* Can deadlock when called with interrupts disabled */
329 WARN_ON(irqs_disabled()); 369 WARN_ON_ONCE(irqs_disabled() && !oops_in_progress);
330 370
331 /* So, what's a CPU they want? Ignoring this one. */ 371 /* So, what's a CPU they want? Ignoring this one. */
332 cpu = cpumask_first_and(mask, cpu_online_mask); 372 cpu = cpumask_first_and(mask, cpu_online_mask);
333 if (cpu == smp_processor_id()) 373 if (cpu == this_cpu)
334 cpu = cpumask_next_and(cpu, mask, cpu_online_mask); 374 cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
375
335 /* No online cpus? We're done. */ 376 /* No online cpus? We're done. */
336 if (cpu >= nr_cpu_ids) 377 if (cpu >= nr_cpu_ids)
337 return; 378 return;
338 379
339 /* Do we have another CPU which isn't us? */ 380 /* Do we have another CPU which isn't us? */
340 next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); 381 next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
341 if (next_cpu == smp_processor_id()) 382 if (next_cpu == this_cpu)
342 next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); 383 next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
343 384
344 /* Fastpath: do that cpu by itself. */ 385 /* Fastpath: do that cpu by itself. */
@@ -347,43 +388,40 @@ void smp_call_function_many(const struct cpumask *mask,
347 return; 388 return;
348 } 389 }
349 390
350 data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC); 391 data = &__get_cpu_var(cfd_data);
351 if (unlikely(!data)) { 392 csd_lock(&data->csd);
352 /* Slow path. */
353 for_each_online_cpu(cpu) {
354 if (cpu == smp_processor_id())
355 continue;
356 if (cpumask_test_cpu(cpu, mask))
357 smp_call_function_single(cpu, func, info, wait);
358 }
359 return;
360 }
361 393
362 spin_lock_init(&data->lock); 394 spin_lock_irqsave(&data->lock, flags);
363 data->csd.flags = CSD_FLAG_ALLOC;
364 if (wait)
365 data->csd.flags |= CSD_FLAG_WAIT;
366 data->csd.func = func; 395 data->csd.func = func;
367 data->csd.info = info; 396 data->csd.info = info;
368 cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask); 397 cpumask_and(data->cpumask, mask, cpu_online_mask);
369 cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits)); 398 cpumask_clear_cpu(this_cpu, data->cpumask);
370 data->refs = cpumask_weight(to_cpumask(data->cpumask_bits)); 399 data->refs = cpumask_weight(data->cpumask);
371 400
372 spin_lock_irqsave(&call_function_lock, flags); 401 spin_lock(&call_function.lock);
373 list_add_tail_rcu(&data->csd.list, &call_function_queue); 402 /*
374 spin_unlock_irqrestore(&call_function_lock, flags); 403 * Place entry at the _HEAD_ of the list, so that any cpu still
404 * observing the entry in generic_smp_call_function_interrupt()
405 * will not miss any other list entries:
406 */
407 list_add_rcu(&data->csd.list, &call_function.queue);
408 spin_unlock(&call_function.lock);
409
410 spin_unlock_irqrestore(&data->lock, flags);
375 411
376 /* 412 /*
377 * Make the list addition visible before sending the ipi. 413 * Make the list addition visible before sending the ipi.
414 * (IPIs must obey or appear to obey normal Linux cache
415 * coherency rules -- see comment in generic_exec_single).
378 */ 416 */
379 smp_mb(); 417 smp_mb();
380 418
381 /* Send a message to all CPUs in the map */ 419 /* Send a message to all CPUs in the map */
382 arch_send_call_function_ipi_mask(to_cpumask(data->cpumask_bits)); 420 arch_send_call_function_ipi_mask(data->cpumask);
383 421
384 /* optionally wait for the CPUs to complete */ 422 /* Optionally wait for the CPUs to complete */
385 if (wait) 423 if (wait)
386 csd_flag_wait(&data->csd); 424 csd_lock_wait(&data->csd);
387} 425}
388EXPORT_SYMBOL(smp_call_function_many); 426EXPORT_SYMBOL(smp_call_function_many);
389 427
@@ -391,7 +429,8 @@ EXPORT_SYMBOL(smp_call_function_many);
391 * smp_call_function(): Run a function on all other CPUs. 429 * smp_call_function(): Run a function on all other CPUs.
392 * @func: The function to run. This must be fast and non-blocking. 430 * @func: The function to run. This must be fast and non-blocking.
393 * @info: An arbitrary pointer to pass to the function. 431 * @info: An arbitrary pointer to pass to the function.
394 * @wait: If true, wait (atomically) until function has completed on other CPUs. 432 * @wait: If true, wait (atomically) until function has completed
433 * on other CPUs.
395 * 434 *
396 * Returns 0. 435 * Returns 0.
397 * 436 *
@@ -407,26 +446,27 @@ int smp_call_function(void (*func)(void *), void *info, int wait)
407 preempt_disable(); 446 preempt_disable();
408 smp_call_function_many(cpu_online_mask, func, info, wait); 447 smp_call_function_many(cpu_online_mask, func, info, wait);
409 preempt_enable(); 448 preempt_enable();
449
410 return 0; 450 return 0;
411} 451}
412EXPORT_SYMBOL(smp_call_function); 452EXPORT_SYMBOL(smp_call_function);
413 453
414void ipi_call_lock(void) 454void ipi_call_lock(void)
415{ 455{
416 spin_lock(&call_function_lock); 456 spin_lock(&call_function.lock);
417} 457}
418 458
419void ipi_call_unlock(void) 459void ipi_call_unlock(void)
420{ 460{
421 spin_unlock(&call_function_lock); 461 spin_unlock(&call_function.lock);
422} 462}
423 463
424void ipi_call_lock_irq(void) 464void ipi_call_lock_irq(void)
425{ 465{
426 spin_lock_irq(&call_function_lock); 466 spin_lock_irq(&call_function.lock);
427} 467}
428 468
429void ipi_call_unlock_irq(void) 469void ipi_call_unlock_irq(void)
430{ 470{
431 spin_unlock_irq(&call_function_lock); 471 spin_unlock_irq(&call_function.lock);
432} 472}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 57d3f67f6f38..ea23ec087ee9 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -180,7 +180,7 @@ asmlinkage void __do_softirq(void)
180 account_system_vtime(current); 180 account_system_vtime(current);
181 181
182 __local_bh_disable((unsigned long)__builtin_return_address(0)); 182 __local_bh_disable((unsigned long)__builtin_return_address(0));
183 trace_softirq_enter(); 183 lockdep_softirq_enter();
184 184
185 cpu = smp_processor_id(); 185 cpu = smp_processor_id();
186restart: 186restart:
@@ -220,7 +220,7 @@ restart:
220 if (pending) 220 if (pending)
221 wakeup_softirqd(); 221 wakeup_softirqd();
222 222
223 trace_softirq_exit(); 223 lockdep_softirq_exit();
224 224
225 account_system_vtime(current); 225 account_system_vtime(current);
226 _local_bh_enable(); 226 _local_bh_enable();
@@ -496,7 +496,7 @@ static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softir
496 cp->flags = 0; 496 cp->flags = 0;
497 cp->priv = softirq; 497 cp->priv = softirq;
498 498
499 __smp_call_function_single(cpu, cp); 499 __smp_call_function_single(cpu, cp, 0);
500 return 0; 500 return 0;
501 } 501 }
502 return 1; 502 return 1;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5ec4543dfc06..82350f8f04f6 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -48,6 +48,7 @@
48#include <linux/acpi.h> 48#include <linux/acpi.h>
49#include <linux/reboot.h> 49#include <linux/reboot.h>
50#include <linux/ftrace.h> 50#include <linux/ftrace.h>
51#include <linux/slow-work.h>
51 52
52#include <asm/uaccess.h> 53#include <asm/uaccess.h>
53#include <asm/processor.h> 54#include <asm/processor.h>
@@ -897,6 +898,14 @@ static struct ctl_table kern_table[] = {
897 .proc_handler = &scan_unevictable_handler, 898 .proc_handler = &scan_unevictable_handler,
898 }, 899 },
899#endif 900#endif
901#ifdef CONFIG_SLOW_WORK
902 {
903 .ctl_name = CTL_UNNUMBERED,
904 .procname = "slow-work",
905 .mode = 0555,
906 .child = slow_work_sysctls,
907 },
908#endif
900/* 909/*
901 * NOTE: do not add new entries to this table unless you have read 910 * NOTE: do not add new entries to this table unless you have read
902 * Documentation/sysctl/ctl_unnumbered.txt 911 * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 34e707e5ab87..504086ab4443 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -72,11 +72,10 @@ config FUNCTION_GRAPH_TRACER
72 help 72 help
73 Enable the kernel to trace a function at both its return 73 Enable the kernel to trace a function at both its return
74 and its entry. 74 and its entry.
75 It's first purpose is to trace the duration of functions and 75 Its first purpose is to trace the duration of functions and
76 draw a call graph for each thread with some informations like 76 draw a call graph for each thread with some information like
77 the return value. 77 the return value. This is done by setting the current return
78 This is done by setting the current return address on the current 78 address on the current task structure into a stack of calls.
79 task structure into a stack of calls.
80 79
81config IRQSOFF_TRACER 80config IRQSOFF_TRACER
82 bool "Interrupts-off Latency Tracer" 81 bool "Interrupts-off Latency Tracer"
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fdf913dfc7e8..53e8c8bc0c98 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1908,7 +1908,7 @@ int register_ftrace_function(struct ftrace_ops *ops)
1908} 1908}
1909 1909
1910/** 1910/**
1911 * unregister_ftrace_function - unresgister a function for profiling. 1911 * unregister_ftrace_function - unregister a function for profiling.
1912 * @ops - ops structure that holds the function to unregister 1912 * @ops - ops structure that holds the function to unregister
1913 * 1913 *
1914 * Unregister a function that was added to be called by ftrace profiling. 1914 * Unregister a function that was added to be called by ftrace profiling.