diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/extable.c | 25 | ||||
-rw-r--r-- | kernel/lockdep.c | 16 | ||||
-rw-r--r-- | kernel/panic.c | 115 | ||||
-rw-r--r-- | kernel/power/disk.c | 4 | ||||
-rw-r--r-- | kernel/ptrace.c | 2 | ||||
-rw-r--r-- | kernel/rcupdate.c | 44 | ||||
-rw-r--r-- | kernel/sched.c | 2 | ||||
-rw-r--r-- | kernel/slow-work.c | 640 | ||||
-rw-r--r-- | kernel/smp.c | 432 | ||||
-rw-r--r-- | kernel/softirq.c | 6 | ||||
-rw-r--r-- | kernel/sysctl.c | 9 | ||||
-rw-r--r-- | kernel/trace/Kconfig | 9 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 2 |
14 files changed, 1028 insertions, 279 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index e4791b3ba55d..bab1dffe37e9 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -93,6 +93,7 @@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o | |||
93 | obj-$(CONFIG_FUNCTION_TRACER) += trace/ | 93 | obj-$(CONFIG_FUNCTION_TRACER) += trace/ |
94 | obj-$(CONFIG_TRACING) += trace/ | 94 | obj-$(CONFIG_TRACING) += trace/ |
95 | obj-$(CONFIG_SMP) += sched_cpupri.o | 95 | obj-$(CONFIG_SMP) += sched_cpupri.o |
96 | obj-$(CONFIG_SLOW_WORK) += slow-work.o | ||
96 | 97 | ||
97 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) | 98 | ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) |
98 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is | 99 | # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is |
diff --git a/kernel/extable.c b/kernel/extable.c index e136ed8d82ba..c46da6a47036 100644 --- a/kernel/extable.c +++ b/kernel/extable.c | |||
@@ -41,6 +41,14 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) | |||
41 | return e; | 41 | return e; |
42 | } | 42 | } |
43 | 43 | ||
44 | static inline int init_kernel_text(unsigned long addr) | ||
45 | { | ||
46 | if (addr >= (unsigned long)_sinittext && | ||
47 | addr <= (unsigned long)_einittext) | ||
48 | return 1; | ||
49 | return 0; | ||
50 | } | ||
51 | |||
44 | __notrace_funcgraph int core_kernel_text(unsigned long addr) | 52 | __notrace_funcgraph int core_kernel_text(unsigned long addr) |
45 | { | 53 | { |
46 | if (addr >= (unsigned long)_stext && | 54 | if (addr >= (unsigned long)_stext && |
@@ -48,8 +56,7 @@ __notrace_funcgraph int core_kernel_text(unsigned long addr) | |||
48 | return 1; | 56 | return 1; |
49 | 57 | ||
50 | if (system_state == SYSTEM_BOOTING && | 58 | if (system_state == SYSTEM_BOOTING && |
51 | addr >= (unsigned long)_sinittext && | 59 | init_kernel_text(addr)) |
52 | addr <= (unsigned long)_einittext) | ||
53 | return 1; | 60 | return 1; |
54 | return 0; | 61 | return 0; |
55 | } | 62 | } |
@@ -58,7 +65,19 @@ __notrace_funcgraph int __kernel_text_address(unsigned long addr) | |||
58 | { | 65 | { |
59 | if (core_kernel_text(addr)) | 66 | if (core_kernel_text(addr)) |
60 | return 1; | 67 | return 1; |
61 | return __module_text_address(addr) != NULL; | 68 | if (__module_text_address(addr)) |
69 | return 1; | ||
70 | /* | ||
71 | * There might be init symbols in saved stacktraces. | ||
72 | * Give those symbols a chance to be printed in | ||
73 | * backtraces (such as lockdep traces). | ||
74 | * | ||
75 | * Since we are after the module-symbols check, there's | ||
76 | * no danger of address overlap: | ||
77 | */ | ||
78 | if (init_kernel_text(addr)) | ||
79 | return 1; | ||
80 | return 0; | ||
62 | } | 81 | } |
63 | 82 | ||
64 | int kernel_text_address(unsigned long addr) | 83 | int kernel_text_address(unsigned long addr) |
diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 3673a3f44d9d..981cd4854281 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c | |||
@@ -433,13 +433,6 @@ atomic_t nr_find_usage_forwards_checks; | |||
433 | atomic_t nr_find_usage_forwards_recursions; | 433 | atomic_t nr_find_usage_forwards_recursions; |
434 | atomic_t nr_find_usage_backwards_checks; | 434 | atomic_t nr_find_usage_backwards_checks; |
435 | atomic_t nr_find_usage_backwards_recursions; | 435 | atomic_t nr_find_usage_backwards_recursions; |
436 | # define debug_atomic_inc(ptr) atomic_inc(ptr) | ||
437 | # define debug_atomic_dec(ptr) atomic_dec(ptr) | ||
438 | # define debug_atomic_read(ptr) atomic_read(ptr) | ||
439 | #else | ||
440 | # define debug_atomic_inc(ptr) do { } while (0) | ||
441 | # define debug_atomic_dec(ptr) do { } while (0) | ||
442 | # define debug_atomic_read(ptr) 0 | ||
443 | #endif | 436 | #endif |
444 | 437 | ||
445 | /* | 438 | /* |
@@ -1900,9 +1893,9 @@ print_irq_inversion_bug(struct task_struct *curr, struct lock_class *other, | |||
1900 | curr->comm, task_pid_nr(curr)); | 1893 | curr->comm, task_pid_nr(curr)); |
1901 | print_lock(this); | 1894 | print_lock(this); |
1902 | if (forwards) | 1895 | if (forwards) |
1903 | printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass); | 1896 | printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass); |
1904 | else | 1897 | else |
1905 | printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass); | 1898 | printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass); |
1906 | print_lock_name(other); | 1899 | print_lock_name(other); |
1907 | printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); | 1900 | printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); |
1908 | 1901 | ||
@@ -2015,7 +2008,8 @@ typedef int (*check_usage_f)(struct task_struct *, struct held_lock *, | |||
2015 | enum lock_usage_bit bit, const char *name); | 2008 | enum lock_usage_bit bit, const char *name); |
2016 | 2009 | ||
2017 | static int | 2010 | static int |
2018 | mark_lock_irq(struct task_struct *curr, struct held_lock *this, int new_bit) | 2011 | mark_lock_irq(struct task_struct *curr, struct held_lock *this, |
2012 | enum lock_usage_bit new_bit) | ||
2019 | { | 2013 | { |
2020 | int excl_bit = exclusive_bit(new_bit); | 2014 | int excl_bit = exclusive_bit(new_bit); |
2021 | int read = new_bit & 1; | 2015 | int read = new_bit & 1; |
@@ -2043,7 +2037,7 @@ mark_lock_irq(struct task_struct *curr, struct held_lock *this, int new_bit) | |||
2043 | * states. | 2037 | * states. |
2044 | */ | 2038 | */ |
2045 | if ((!read || !dir || STRICT_READ_CHECKS) && | 2039 | if ((!read || !dir || STRICT_READ_CHECKS) && |
2046 | !usage(curr, this, excl_bit, state_name(new_bit))) | 2040 | !usage(curr, this, excl_bit, state_name(new_bit & ~1))) |
2047 | return 0; | 2041 | return 0; |
2048 | 2042 | ||
2049 | /* | 2043 | /* |
diff --git a/kernel/panic.c b/kernel/panic.c index 32fe4eff1b89..3fd8c5bf8b39 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
@@ -8,19 +8,19 @@ | |||
8 | * This function is used through-out the kernel (including mm and fs) | 8 | * This function is used through-out the kernel (including mm and fs) |
9 | * to indicate a major problem. | 9 | * to indicate a major problem. |
10 | */ | 10 | */ |
11 | #include <linux/debug_locks.h> | ||
12 | #include <linux/interrupt.h> | ||
13 | #include <linux/kallsyms.h> | ||
14 | #include <linux/notifier.h> | ||
11 | #include <linux/module.h> | 15 | #include <linux/module.h> |
12 | #include <linux/sched.h> | 16 | #include <linux/random.h> |
13 | #include <linux/delay.h> | ||
14 | #include <linux/reboot.h> | 17 | #include <linux/reboot.h> |
15 | #include <linux/notifier.h> | 18 | #include <linux/delay.h> |
16 | #include <linux/init.h> | 19 | #include <linux/kexec.h> |
20 | #include <linux/sched.h> | ||
17 | #include <linux/sysrq.h> | 21 | #include <linux/sysrq.h> |
18 | #include <linux/interrupt.h> | 22 | #include <linux/init.h> |
19 | #include <linux/nmi.h> | 23 | #include <linux/nmi.h> |
20 | #include <linux/kexec.h> | ||
21 | #include <linux/debug_locks.h> | ||
22 | #include <linux/random.h> | ||
23 | #include <linux/kallsyms.h> | ||
24 | #include <linux/dmi.h> | 24 | #include <linux/dmi.h> |
25 | 25 | ||
26 | int panic_on_oops; | 26 | int panic_on_oops; |
@@ -52,19 +52,15 @@ EXPORT_SYMBOL(panic_blink); | |||
52 | * | 52 | * |
53 | * This function never returns. | 53 | * This function never returns. |
54 | */ | 54 | */ |
55 | |||
56 | NORET_TYPE void panic(const char * fmt, ...) | 55 | NORET_TYPE void panic(const char * fmt, ...) |
57 | { | 56 | { |
58 | long i; | ||
59 | static char buf[1024]; | 57 | static char buf[1024]; |
60 | va_list args; | 58 | va_list args; |
61 | #if defined(CONFIG_S390) | 59 | long i; |
62 | unsigned long caller = (unsigned long) __builtin_return_address(0); | ||
63 | #endif | ||
64 | 60 | ||
65 | /* | 61 | /* |
66 | * It's possible to come here directly from a panic-assertion and not | 62 | * It's possible to come here directly from a panic-assertion and |
67 | * have preempt disabled. Some functions called from here want | 63 | * not have preempt disabled. Some functions called from here want |
68 | * preempt to be disabled. No point enabling it later though... | 64 | * preempt to be disabled. No point enabling it later though... |
69 | */ | 65 | */ |
70 | preempt_disable(); | 66 | preempt_disable(); |
@@ -77,7 +73,6 @@ NORET_TYPE void panic(const char * fmt, ...) | |||
77 | #ifdef CONFIG_DEBUG_BUGVERBOSE | 73 | #ifdef CONFIG_DEBUG_BUGVERBOSE |
78 | dump_stack(); | 74 | dump_stack(); |
79 | #endif | 75 | #endif |
80 | bust_spinlocks(0); | ||
81 | 76 | ||
82 | /* | 77 | /* |
83 | * If we have crashed and we have a crash kernel loaded let it handle | 78 | * If we have crashed and we have a crash kernel loaded let it handle |
@@ -86,14 +81,12 @@ NORET_TYPE void panic(const char * fmt, ...) | |||
86 | */ | 81 | */ |
87 | crash_kexec(NULL); | 82 | crash_kexec(NULL); |
88 | 83 | ||
89 | #ifdef CONFIG_SMP | ||
90 | /* | 84 | /* |
91 | * Note smp_send_stop is the usual smp shutdown function, which | 85 | * Note smp_send_stop is the usual smp shutdown function, which |
92 | * unfortunately means it may not be hardened to work in a panic | 86 | * unfortunately means it may not be hardened to work in a panic |
93 | * situation. | 87 | * situation. |
94 | */ | 88 | */ |
95 | smp_send_stop(); | 89 | smp_send_stop(); |
96 | #endif | ||
97 | 90 | ||
98 | atomic_notifier_call_chain(&panic_notifier_list, 0, buf); | 91 | atomic_notifier_call_chain(&panic_notifier_list, 0, buf); |
99 | 92 | ||
@@ -102,19 +95,21 @@ NORET_TYPE void panic(const char * fmt, ...) | |||
102 | 95 | ||
103 | if (panic_timeout > 0) { | 96 | if (panic_timeout > 0) { |
104 | /* | 97 | /* |
105 | * Delay timeout seconds before rebooting the machine. | 98 | * Delay timeout seconds before rebooting the machine. |
106 | * We can't use the "normal" timers since we just panicked.. | 99 | * We can't use the "normal" timers since we just panicked. |
107 | */ | 100 | */ |
108 | printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout); | 101 | printk(KERN_EMERG "Rebooting in %d seconds..", panic_timeout); |
102 | |||
109 | for (i = 0; i < panic_timeout*1000; ) { | 103 | for (i = 0; i < panic_timeout*1000; ) { |
110 | touch_nmi_watchdog(); | 104 | touch_nmi_watchdog(); |
111 | i += panic_blink(i); | 105 | i += panic_blink(i); |
112 | mdelay(1); | 106 | mdelay(1); |
113 | i++; | 107 | i++; |
114 | } | 108 | } |
115 | /* This will not be a clean reboot, with everything | 109 | /* |
116 | * shutting down. But if there is a chance of | 110 | * This will not be a clean reboot, with everything |
117 | * rebooting the system it will be rebooted. | 111 | * shutting down. But if there is a chance of |
112 | * rebooting the system it will be rebooted. | ||
118 | */ | 113 | */ |
119 | emergency_restart(); | 114 | emergency_restart(); |
120 | } | 115 | } |
@@ -127,38 +122,44 @@ NORET_TYPE void panic(const char * fmt, ...) | |||
127 | } | 122 | } |
128 | #endif | 123 | #endif |
129 | #if defined(CONFIG_S390) | 124 | #if defined(CONFIG_S390) |
130 | disabled_wait(caller); | 125 | { |
126 | unsigned long caller; | ||
127 | |||
128 | caller = (unsigned long)__builtin_return_address(0); | ||
129 | disabled_wait(caller); | ||
130 | } | ||
131 | #endif | 131 | #endif |
132 | local_irq_enable(); | 132 | local_irq_enable(); |
133 | for (i = 0;;) { | 133 | for (i = 0; ; ) { |
134 | touch_softlockup_watchdog(); | 134 | touch_softlockup_watchdog(); |
135 | i += panic_blink(i); | 135 | i += panic_blink(i); |
136 | mdelay(1); | 136 | mdelay(1); |
137 | i++; | 137 | i++; |
138 | } | 138 | } |
139 | bust_spinlocks(0); | ||
139 | } | 140 | } |
140 | 141 | ||
141 | EXPORT_SYMBOL(panic); | 142 | EXPORT_SYMBOL(panic); |
142 | 143 | ||
143 | 144 | ||
144 | struct tnt { | 145 | struct tnt { |
145 | u8 bit; | 146 | u8 bit; |
146 | char true; | 147 | char true; |
147 | char false; | 148 | char false; |
148 | }; | 149 | }; |
149 | 150 | ||
150 | static const struct tnt tnts[] = { | 151 | static const struct tnt tnts[] = { |
151 | { TAINT_PROPRIETARY_MODULE, 'P', 'G' }, | 152 | { TAINT_PROPRIETARY_MODULE, 'P', 'G' }, |
152 | { TAINT_FORCED_MODULE, 'F', ' ' }, | 153 | { TAINT_FORCED_MODULE, 'F', ' ' }, |
153 | { TAINT_UNSAFE_SMP, 'S', ' ' }, | 154 | { TAINT_UNSAFE_SMP, 'S', ' ' }, |
154 | { TAINT_FORCED_RMMOD, 'R', ' ' }, | 155 | { TAINT_FORCED_RMMOD, 'R', ' ' }, |
155 | { TAINT_MACHINE_CHECK, 'M', ' ' }, | 156 | { TAINT_MACHINE_CHECK, 'M', ' ' }, |
156 | { TAINT_BAD_PAGE, 'B', ' ' }, | 157 | { TAINT_BAD_PAGE, 'B', ' ' }, |
157 | { TAINT_USER, 'U', ' ' }, | 158 | { TAINT_USER, 'U', ' ' }, |
158 | { TAINT_DIE, 'D', ' ' }, | 159 | { TAINT_DIE, 'D', ' ' }, |
159 | { TAINT_OVERRIDDEN_ACPI_TABLE, 'A', ' ' }, | 160 | { TAINT_OVERRIDDEN_ACPI_TABLE, 'A', ' ' }, |
160 | { TAINT_WARN, 'W', ' ' }, | 161 | { TAINT_WARN, 'W', ' ' }, |
161 | { TAINT_CRAP, 'C', ' ' }, | 162 | { TAINT_CRAP, 'C', ' ' }, |
162 | }; | 163 | }; |
163 | 164 | ||
164 | /** | 165 | /** |
@@ -195,7 +196,8 @@ const char *print_tainted(void) | |||
195 | *s = 0; | 196 | *s = 0; |
196 | } else | 197 | } else |
197 | snprintf(buf, sizeof(buf), "Not tainted"); | 198 | snprintf(buf, sizeof(buf), "Not tainted"); |
198 | return(buf); | 199 | |
200 | return buf; | ||
199 | } | 201 | } |
200 | 202 | ||
201 | int test_taint(unsigned flag) | 203 | int test_taint(unsigned flag) |
@@ -211,7 +213,8 @@ unsigned long get_taint(void) | |||
211 | 213 | ||
212 | void add_taint(unsigned flag) | 214 | void add_taint(unsigned flag) |
213 | { | 215 | { |
214 | debug_locks = 0; /* can't trust the integrity of the kernel anymore */ | 216 | /* can't trust the integrity of the kernel anymore: */ |
217 | debug_locks = 0; | ||
215 | set_bit(flag, &tainted_mask); | 218 | set_bit(flag, &tainted_mask); |
216 | } | 219 | } |
217 | EXPORT_SYMBOL(add_taint); | 220 | EXPORT_SYMBOL(add_taint); |
@@ -266,8 +269,8 @@ static void do_oops_enter_exit(void) | |||
266 | } | 269 | } |
267 | 270 | ||
268 | /* | 271 | /* |
269 | * Return true if the calling CPU is allowed to print oops-related info. This | 272 | * Return true if the calling CPU is allowed to print oops-related info. |
270 | * is a bit racy.. | 273 | * This is a bit racy.. |
271 | */ | 274 | */ |
272 | int oops_may_print(void) | 275 | int oops_may_print(void) |
273 | { | 276 | { |
@@ -276,20 +279,22 @@ int oops_may_print(void) | |||
276 | 279 | ||
277 | /* | 280 | /* |
278 | * Called when the architecture enters its oops handler, before it prints | 281 | * Called when the architecture enters its oops handler, before it prints |
279 | * anything. If this is the first CPU to oops, and it's oopsing the first time | 282 | * anything. If this is the first CPU to oops, and it's oopsing the first |
280 | * then let it proceed. | 283 | * time then let it proceed. |
281 | * | 284 | * |
282 | * This is all enabled by the pause_on_oops kernel boot option. We do all this | 285 | * This is all enabled by the pause_on_oops kernel boot option. We do all |
283 | * to ensure that oopses don't scroll off the screen. It has the side-effect | 286 | * this to ensure that oopses don't scroll off the screen. It has the |
284 | * of preventing later-oopsing CPUs from mucking up the display, too. | 287 | * side-effect of preventing later-oopsing CPUs from mucking up the display, |
288 | * too. | ||
285 | * | 289 | * |
286 | * It turns out that the CPU which is allowed to print ends up pausing for the | 290 | * It turns out that the CPU which is allowed to print ends up pausing for |
287 | * right duration, whereas all the other CPUs pause for twice as long: once in | 291 | * the right duration, whereas all the other CPUs pause for twice as long: |
288 | * oops_enter(), once in oops_exit(). | 292 | * once in oops_enter(), once in oops_exit(). |
289 | */ | 293 | */ |
290 | void oops_enter(void) | 294 | void oops_enter(void) |
291 | { | 295 | { |
292 | debug_locks_off(); /* can't trust the integrity of the kernel anymore */ | 296 | /* can't trust the integrity of the kernel anymore: */ |
297 | debug_locks_off(); | ||
293 | do_oops_enter_exit(); | 298 | do_oops_enter_exit(); |
294 | } | 299 | } |
295 | 300 | ||
diff --git a/kernel/power/disk.c b/kernel/power/disk.c index f3db382c2b2d..5f21ab2bbcdf 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c | |||
@@ -289,7 +289,7 @@ static int create_image(int platform_mode) | |||
289 | * hibernation_snapshot - quiesce devices and create the hibernation | 289 | * hibernation_snapshot - quiesce devices and create the hibernation |
290 | * snapshot image. | 290 | * snapshot image. |
291 | * @platform_mode - if set, use the platform driver, if available, to | 291 | * @platform_mode - if set, use the platform driver, if available, to |
292 | * prepare the platform frimware for the power transition. | 292 | * prepare the platform firmware for the power transition. |
293 | * | 293 | * |
294 | * Must be called with pm_mutex held | 294 | * Must be called with pm_mutex held |
295 | */ | 295 | */ |
@@ -412,7 +412,7 @@ static int resume_target_kernel(bool platform_mode) | |||
412 | * hibernation_restore - quiesce devices and restore the hibernation | 412 | * hibernation_restore - quiesce devices and restore the hibernation |
413 | * snapshot image. If successful, control returns in hibernation_snaphot() | 413 | * snapshot image. If successful, control returns in hibernation_snaphot() |
414 | * @platform_mode - if set, use the platform driver, if available, to | 414 | * @platform_mode - if set, use the platform driver, if available, to |
415 | * prepare the platform frimware for the transition. | 415 | * prepare the platform firmware for the transition. |
416 | * | 416 | * |
417 | * Must be called with pm_mutex held | 417 | * Must be called with pm_mutex held |
418 | */ | 418 | */ |
diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 5105f5a6a2ce..aaad0ec34194 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c | |||
@@ -687,8 +687,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) | |||
687 | goto out_put_task_struct; | 687 | goto out_put_task_struct; |
688 | 688 | ||
689 | ret = arch_ptrace(child, request, addr, data); | 689 | ret = arch_ptrace(child, request, addr, data); |
690 | if (ret < 0) | ||
691 | goto out_put_task_struct; | ||
692 | 690 | ||
693 | out_put_task_struct: | 691 | out_put_task_struct: |
694 | put_task_struct(child); | 692 | put_task_struct(child); |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index cae8a059cf47..2c7b8457d0d2 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
@@ -122,6 +122,8 @@ static void rcu_barrier_func(void *type) | |||
122 | } | 122 | } |
123 | } | 123 | } |
124 | 124 | ||
125 | static inline void wait_migrated_callbacks(void); | ||
126 | |||
125 | /* | 127 | /* |
126 | * Orchestrate the specified type of RCU barrier, waiting for all | 128 | * Orchestrate the specified type of RCU barrier, waiting for all |
127 | * RCU callbacks of the specified type to complete. | 129 | * RCU callbacks of the specified type to complete. |
@@ -147,6 +149,7 @@ static void _rcu_barrier(enum rcu_barrier type) | |||
147 | complete(&rcu_barrier_completion); | 149 | complete(&rcu_barrier_completion); |
148 | wait_for_completion(&rcu_barrier_completion); | 150 | wait_for_completion(&rcu_barrier_completion); |
149 | mutex_unlock(&rcu_barrier_mutex); | 151 | mutex_unlock(&rcu_barrier_mutex); |
152 | wait_migrated_callbacks(); | ||
150 | } | 153 | } |
151 | 154 | ||
152 | /** | 155 | /** |
@@ -176,9 +179,50 @@ void rcu_barrier_sched(void) | |||
176 | } | 179 | } |
177 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); | 180 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); |
178 | 181 | ||
182 | static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0); | ||
183 | static struct rcu_head rcu_migrate_head[3]; | ||
184 | static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq); | ||
185 | |||
186 | static void rcu_migrate_callback(struct rcu_head *notused) | ||
187 | { | ||
188 | if (atomic_dec_and_test(&rcu_migrate_type_count)) | ||
189 | wake_up(&rcu_migrate_wq); | ||
190 | } | ||
191 | |||
192 | static inline void wait_migrated_callbacks(void) | ||
193 | { | ||
194 | wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count)); | ||
195 | } | ||
196 | |||
197 | static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self, | ||
198 | unsigned long action, void *hcpu) | ||
199 | { | ||
200 | if (action == CPU_DYING) { | ||
201 | /* | ||
202 | * preempt_disable() in on_each_cpu() prevents stop_machine(), | ||
203 | * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);" | ||
204 | * returns, all online cpus have queued rcu_barrier_func(), | ||
205 | * and the dead cpu(if it exist) queues rcu_migrate_callback()s. | ||
206 | * | ||
207 | * These callbacks ensure _rcu_barrier() waits for all | ||
208 | * RCU callbacks of the specified type to complete. | ||
209 | */ | ||
210 | atomic_set(&rcu_migrate_type_count, 3); | ||
211 | call_rcu_bh(rcu_migrate_head, rcu_migrate_callback); | ||
212 | call_rcu_sched(rcu_migrate_head + 1, rcu_migrate_callback); | ||
213 | call_rcu(rcu_migrate_head + 2, rcu_migrate_callback); | ||
214 | } else if (action == CPU_POST_DEAD) { | ||
215 | /* rcu_migrate_head is protected by cpu_add_remove_lock */ | ||
216 | wait_migrated_callbacks(); | ||
217 | } | ||
218 | |||
219 | return NOTIFY_OK; | ||
220 | } | ||
221 | |||
179 | void __init rcu_init(void) | 222 | void __init rcu_init(void) |
180 | { | 223 | { |
181 | __rcu_init(); | 224 | __rcu_init(); |
225 | hotcpu_notifier(rcu_barrier_cpu_hotplug, 0); | ||
182 | } | 226 | } |
183 | 227 | ||
184 | void rcu_scheduler_starting(void) | 228 | void rcu_scheduler_starting(void) |
diff --git a/kernel/sched.c b/kernel/sched.c index 73513f4e19df..2325db2be31b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1110,7 +1110,7 @@ static void hrtick_start(struct rq *rq, u64 delay) | |||
1110 | if (rq == this_rq()) { | 1110 | if (rq == this_rq()) { |
1111 | hrtimer_restart(timer); | 1111 | hrtimer_restart(timer); |
1112 | } else if (!rq->hrtick_csd_pending) { | 1112 | } else if (!rq->hrtick_csd_pending) { |
1113 | __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd); | 1113 | __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0); |
1114 | rq->hrtick_csd_pending = 1; | 1114 | rq->hrtick_csd_pending = 1; |
1115 | } | 1115 | } |
1116 | } | 1116 | } |
diff --git a/kernel/slow-work.c b/kernel/slow-work.c new file mode 100644 index 000000000000..cf2bc01186ef --- /dev/null +++ b/kernel/slow-work.c | |||
@@ -0,0 +1,640 @@ | |||
1 | /* Worker thread pool for slow items, such as filesystem lookups or mkdirs | ||
2 | * | ||
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | ||
4 | * Written by David Howells (dhowells@redhat.com) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public Licence | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the Licence, or (at your option) any later version. | ||
10 | * | ||
11 | * See Documentation/slow-work.txt | ||
12 | */ | ||
13 | |||
14 | #include <linux/module.h> | ||
15 | #include <linux/slow-work.h> | ||
16 | #include <linux/kthread.h> | ||
17 | #include <linux/freezer.h> | ||
18 | #include <linux/wait.h> | ||
19 | |||
20 | #define SLOW_WORK_CULL_TIMEOUT (5 * HZ) /* cull threads 5s after running out of | ||
21 | * things to do */ | ||
22 | #define SLOW_WORK_OOM_TIMEOUT (5 * HZ) /* can't start new threads for 5s after | ||
23 | * OOM */ | ||
24 | |||
25 | static void slow_work_cull_timeout(unsigned long); | ||
26 | static void slow_work_oom_timeout(unsigned long); | ||
27 | |||
28 | #ifdef CONFIG_SYSCTL | ||
29 | static int slow_work_min_threads_sysctl(struct ctl_table *, int, struct file *, | ||
30 | void __user *, size_t *, loff_t *); | ||
31 | |||
32 | static int slow_work_max_threads_sysctl(struct ctl_table *, int , struct file *, | ||
33 | void __user *, size_t *, loff_t *); | ||
34 | #endif | ||
35 | |||
36 | /* | ||
37 | * The pool of threads has at least min threads in it as long as someone is | ||
38 | * using the facility, and may have as many as max. | ||
39 | * | ||
40 | * A portion of the pool may be processing very slow operations. | ||
41 | */ | ||
42 | static unsigned slow_work_min_threads = 2; | ||
43 | static unsigned slow_work_max_threads = 4; | ||
44 | static unsigned vslow_work_proportion = 50; /* % of threads that may process | ||
45 | * very slow work */ | ||
46 | |||
47 | #ifdef CONFIG_SYSCTL | ||
48 | static const int slow_work_min_min_threads = 2; | ||
49 | static int slow_work_max_max_threads = 255; | ||
50 | static const int slow_work_min_vslow = 1; | ||
51 | static const int slow_work_max_vslow = 99; | ||
52 | |||
53 | ctl_table slow_work_sysctls[] = { | ||
54 | { | ||
55 | .ctl_name = CTL_UNNUMBERED, | ||
56 | .procname = "min-threads", | ||
57 | .data = &slow_work_min_threads, | ||
58 | .maxlen = sizeof(unsigned), | ||
59 | .mode = 0644, | ||
60 | .proc_handler = slow_work_min_threads_sysctl, | ||
61 | .extra1 = (void *) &slow_work_min_min_threads, | ||
62 | .extra2 = &slow_work_max_threads, | ||
63 | }, | ||
64 | { | ||
65 | .ctl_name = CTL_UNNUMBERED, | ||
66 | .procname = "max-threads", | ||
67 | .data = &slow_work_max_threads, | ||
68 | .maxlen = sizeof(unsigned), | ||
69 | .mode = 0644, | ||
70 | .proc_handler = slow_work_max_threads_sysctl, | ||
71 | .extra1 = &slow_work_min_threads, | ||
72 | .extra2 = (void *) &slow_work_max_max_threads, | ||
73 | }, | ||
74 | { | ||
75 | .ctl_name = CTL_UNNUMBERED, | ||
76 | .procname = "vslow-percentage", | ||
77 | .data = &vslow_work_proportion, | ||
78 | .maxlen = sizeof(unsigned), | ||
79 | .mode = 0644, | ||
80 | .proc_handler = &proc_dointvec_minmax, | ||
81 | .extra1 = (void *) &slow_work_min_vslow, | ||
82 | .extra2 = (void *) &slow_work_max_vslow, | ||
83 | }, | ||
84 | { .ctl_name = 0 } | ||
85 | }; | ||
86 | #endif | ||
87 | |||
88 | /* | ||
89 | * The active state of the thread pool | ||
90 | */ | ||
91 | static atomic_t slow_work_thread_count; | ||
92 | static atomic_t vslow_work_executing_count; | ||
93 | |||
94 | static bool slow_work_may_not_start_new_thread; | ||
95 | static bool slow_work_cull; /* cull a thread due to lack of activity */ | ||
96 | static DEFINE_TIMER(slow_work_cull_timer, slow_work_cull_timeout, 0, 0); | ||
97 | static DEFINE_TIMER(slow_work_oom_timer, slow_work_oom_timeout, 0, 0); | ||
98 | static struct slow_work slow_work_new_thread; /* new thread starter */ | ||
99 | |||
100 | /* | ||
101 | * The queues of work items and the lock governing access to them. These are | ||
102 | * shared between all the CPUs. It doesn't make sense to have per-CPU queues | ||
103 | * as the number of threads bears no relation to the number of CPUs. | ||
104 | * | ||
105 | * There are two queues of work items: one for slow work items, and one for | ||
106 | * very slow work items. | ||
107 | */ | ||
108 | static LIST_HEAD(slow_work_queue); | ||
109 | static LIST_HEAD(vslow_work_queue); | ||
110 | static DEFINE_SPINLOCK(slow_work_queue_lock); | ||
111 | |||
112 | /* | ||
113 | * The thread controls. A variable used to signal to the threads that they | ||
114 | * should exit when the queue is empty, a waitqueue used by the threads to wait | ||
115 | * for signals, and a completion set by the last thread to exit. | ||
116 | */ | ||
117 | static bool slow_work_threads_should_exit; | ||
118 | static DECLARE_WAIT_QUEUE_HEAD(slow_work_thread_wq); | ||
119 | static DECLARE_COMPLETION(slow_work_last_thread_exited); | ||
120 | |||
121 | /* | ||
122 | * The number of users of the thread pool and its lock. Whilst this is zero we | ||
123 | * have no threads hanging around, and when this reaches zero, we wait for all | ||
124 | * active or queued work items to complete and kill all the threads we do have. | ||
125 | */ | ||
126 | static int slow_work_user_count; | ||
127 | static DEFINE_MUTEX(slow_work_user_lock); | ||
128 | |||
129 | /* | ||
130 | * Calculate the maximum number of active threads in the pool that are | ||
131 | * permitted to process very slow work items. | ||
132 | * | ||
133 | * The answer is rounded up to at least 1, but may not equal or exceed the | ||
134 | * maximum number of the threads in the pool. This means we always have at | ||
135 | * least one thread that can process slow work items, and we always have at | ||
136 | * least one thread that won't get tied up doing so. | ||
137 | */ | ||
138 | static unsigned slow_work_calc_vsmax(void) | ||
139 | { | ||
140 | unsigned vsmax; | ||
141 | |||
142 | vsmax = atomic_read(&slow_work_thread_count) * vslow_work_proportion; | ||
143 | vsmax /= 100; | ||
144 | vsmax = max(vsmax, 1U); | ||
145 | return min(vsmax, slow_work_max_threads - 1); | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * Attempt to execute stuff queued on a slow thread. Return true if we managed | ||
150 | * it, false if there was nothing to do. | ||
151 | */ | ||
152 | static bool slow_work_execute(void) | ||
153 | { | ||
154 | struct slow_work *work = NULL; | ||
155 | unsigned vsmax; | ||
156 | bool very_slow; | ||
157 | |||
158 | vsmax = slow_work_calc_vsmax(); | ||
159 | |||
160 | /* see if we can schedule a new thread to be started if we're not | ||
161 | * keeping up with the work */ | ||
162 | if (!waitqueue_active(&slow_work_thread_wq) && | ||
163 | (!list_empty(&slow_work_queue) || !list_empty(&vslow_work_queue)) && | ||
164 | atomic_read(&slow_work_thread_count) < slow_work_max_threads && | ||
165 | !slow_work_may_not_start_new_thread) | ||
166 | slow_work_enqueue(&slow_work_new_thread); | ||
167 | |||
168 | /* find something to execute */ | ||
169 | spin_lock_irq(&slow_work_queue_lock); | ||
170 | if (!list_empty(&vslow_work_queue) && | ||
171 | atomic_read(&vslow_work_executing_count) < vsmax) { | ||
172 | work = list_entry(vslow_work_queue.next, | ||
173 | struct slow_work, link); | ||
174 | if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags)) | ||
175 | BUG(); | ||
176 | list_del_init(&work->link); | ||
177 | atomic_inc(&vslow_work_executing_count); | ||
178 | very_slow = true; | ||
179 | } else if (!list_empty(&slow_work_queue)) { | ||
180 | work = list_entry(slow_work_queue.next, | ||
181 | struct slow_work, link); | ||
182 | if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags)) | ||
183 | BUG(); | ||
184 | list_del_init(&work->link); | ||
185 | very_slow = false; | ||
186 | } else { | ||
187 | very_slow = false; /* avoid the compiler warning */ | ||
188 | } | ||
189 | spin_unlock_irq(&slow_work_queue_lock); | ||
190 | |||
191 | if (!work) | ||
192 | return false; | ||
193 | |||
194 | if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags)) | ||
195 | BUG(); | ||
196 | |||
197 | work->ops->execute(work); | ||
198 | |||
199 | if (very_slow) | ||
200 | atomic_dec(&vslow_work_executing_count); | ||
201 | clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags); | ||
202 | |||
203 | /* if someone tried to enqueue the item whilst we were executing it, | ||
204 | * then it'll be left unenqueued to avoid multiple threads trying to | ||
205 | * execute it simultaneously | ||
206 | * | ||
207 | * there is, however, a race between us testing the pending flag and | ||
208 | * getting the spinlock, and between the enqueuer setting the pending | ||
209 | * flag and getting the spinlock, so we use a deferral bit to tell us | ||
210 | * if the enqueuer got there first | ||
211 | */ | ||
212 | if (test_bit(SLOW_WORK_PENDING, &work->flags)) { | ||
213 | spin_lock_irq(&slow_work_queue_lock); | ||
214 | |||
215 | if (!test_bit(SLOW_WORK_EXECUTING, &work->flags) && | ||
216 | test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) | ||
217 | goto auto_requeue; | ||
218 | |||
219 | spin_unlock_irq(&slow_work_queue_lock); | ||
220 | } | ||
221 | |||
222 | work->ops->put_ref(work); | ||
223 | return true; | ||
224 | |||
225 | auto_requeue: | ||
226 | /* we must complete the enqueue operation | ||
227 | * - we transfer our ref on the item back to the appropriate queue | ||
228 | * - don't wake another thread up as we're awake already | ||
229 | */ | ||
230 | if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) | ||
231 | list_add_tail(&work->link, &vslow_work_queue); | ||
232 | else | ||
233 | list_add_tail(&work->link, &slow_work_queue); | ||
234 | spin_unlock_irq(&slow_work_queue_lock); | ||
235 | return true; | ||
236 | } | ||
237 | |||
238 | /** | ||
239 | * slow_work_enqueue - Schedule a slow work item for processing | ||
240 | * @work: The work item to queue | ||
241 | * | ||
242 | * Schedule a slow work item for processing. If the item is already undergoing | ||
243 | * execution, this guarantees not to re-enter the execution routine until the | ||
244 | * first execution finishes. | ||
245 | * | ||
246 | * The item is pinned by this function as it retains a reference to it, managed | ||
247 | * through the item operations. The item is unpinned once it has been | ||
248 | * executed. | ||
249 | * | ||
250 | * An item may hog the thread that is running it for a relatively large amount | ||
251 | * of time, sufficient, for example, to perform several lookup, mkdir, create | ||
252 | * and setxattr operations. It may sleep on I/O and may sleep to obtain locks. | ||
253 | * | ||
254 | * Conversely, if a number of items are awaiting processing, it may take some | ||
255 | * time before any given item is given attention. The number of threads in the | ||
256 | * pool may be increased to deal with demand, but only up to a limit. | ||
257 | * | ||
258 | * If SLOW_WORK_VERY_SLOW is set on the work item, then it will be placed in | ||
259 | * the very slow queue, from which only a portion of the threads will be | ||
260 | * allowed to pick items to execute. This ensures that very slow items won't | ||
261 | * overly block ones that are just ordinarily slow. | ||
262 | * | ||
263 | * Returns 0 if successful, -EAGAIN if not. | ||
264 | */ | ||
265 | int slow_work_enqueue(struct slow_work *work) | ||
266 | { | ||
267 | unsigned long flags; | ||
268 | |||
269 | BUG_ON(slow_work_user_count <= 0); | ||
270 | BUG_ON(!work); | ||
271 | BUG_ON(!work->ops); | ||
272 | BUG_ON(!work->ops->get_ref); | ||
273 | |||
274 | /* when honouring an enqueue request, we only promise that we will run | ||
275 | * the work function in the future; we do not promise to run it once | ||
276 | * per enqueue request | ||
277 | * | ||
278 | * we use the PENDING bit to merge together repeat requests without | ||
279 | * having to disable IRQs and take the spinlock, whilst still | ||
280 | * maintaining our promise | ||
281 | */ | ||
282 | if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { | ||
283 | spin_lock_irqsave(&slow_work_queue_lock, flags); | ||
284 | |||
285 | /* we promise that we will not attempt to execute the work | ||
286 | * function in more than one thread simultaneously | ||
287 | * | ||
288 | * this, however, leaves us with a problem if we're asked to | ||
289 | * enqueue the work whilst someone is executing the work | ||
290 | * function as simply queueing the work immediately means that | ||
291 | * another thread may try executing it whilst it is already | ||
292 | * under execution | ||
293 | * | ||
294 | * to deal with this, we set the ENQ_DEFERRED bit instead of | ||
295 | * enqueueing, and the thread currently executing the work | ||
296 | * function will enqueue the work item when the work function | ||
297 | * returns and it has cleared the EXECUTING bit | ||
298 | */ | ||
299 | if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) { | ||
300 | set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); | ||
301 | } else { | ||
302 | if (work->ops->get_ref(work) < 0) | ||
303 | goto cant_get_ref; | ||
304 | if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) | ||
305 | list_add_tail(&work->link, &vslow_work_queue); | ||
306 | else | ||
307 | list_add_tail(&work->link, &slow_work_queue); | ||
308 | wake_up(&slow_work_thread_wq); | ||
309 | } | ||
310 | |||
311 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | ||
312 | } | ||
313 | return 0; | ||
314 | |||
315 | cant_get_ref: | ||
316 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | ||
317 | return -EAGAIN; | ||
318 | } | ||
319 | EXPORT_SYMBOL(slow_work_enqueue); | ||
320 | |||
321 | /* | ||
322 | * Worker thread culling algorithm | ||
323 | */ | ||
324 | static bool slow_work_cull_thread(void) | ||
325 | { | ||
326 | unsigned long flags; | ||
327 | bool do_cull = false; | ||
328 | |||
329 | spin_lock_irqsave(&slow_work_queue_lock, flags); | ||
330 | |||
331 | if (slow_work_cull) { | ||
332 | slow_work_cull = false; | ||
333 | |||
334 | if (list_empty(&slow_work_queue) && | ||
335 | list_empty(&vslow_work_queue) && | ||
336 | atomic_read(&slow_work_thread_count) > | ||
337 | slow_work_min_threads) { | ||
338 | mod_timer(&slow_work_cull_timer, | ||
339 | jiffies + SLOW_WORK_CULL_TIMEOUT); | ||
340 | do_cull = true; | ||
341 | } | ||
342 | } | ||
343 | |||
344 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | ||
345 | return do_cull; | ||
346 | } | ||
347 | |||
348 | /* | ||
349 | * Determine if there is slow work available for dispatch | ||
350 | */ | ||
351 | static inline bool slow_work_available(int vsmax) | ||
352 | { | ||
353 | return !list_empty(&slow_work_queue) || | ||
354 | (!list_empty(&vslow_work_queue) && | ||
355 | atomic_read(&vslow_work_executing_count) < vsmax); | ||
356 | } | ||
357 | |||
358 | /* | ||
359 | * Worker thread dispatcher | ||
360 | */ | ||
361 | static int slow_work_thread(void *_data) | ||
362 | { | ||
363 | int vsmax; | ||
364 | |||
365 | DEFINE_WAIT(wait); | ||
366 | |||
367 | set_freezable(); | ||
368 | set_user_nice(current, -5); | ||
369 | |||
370 | for (;;) { | ||
371 | vsmax = vslow_work_proportion; | ||
372 | vsmax *= atomic_read(&slow_work_thread_count); | ||
373 | vsmax /= 100; | ||
374 | |||
375 | prepare_to_wait(&slow_work_thread_wq, &wait, | ||
376 | TASK_INTERRUPTIBLE); | ||
377 | if (!freezing(current) && | ||
378 | !slow_work_threads_should_exit && | ||
379 | !slow_work_available(vsmax) && | ||
380 | !slow_work_cull) | ||
381 | schedule(); | ||
382 | finish_wait(&slow_work_thread_wq, &wait); | ||
383 | |||
384 | try_to_freeze(); | ||
385 | |||
386 | vsmax = vslow_work_proportion; | ||
387 | vsmax *= atomic_read(&slow_work_thread_count); | ||
388 | vsmax /= 100; | ||
389 | |||
390 | if (slow_work_available(vsmax) && slow_work_execute()) { | ||
391 | cond_resched(); | ||
392 | if (list_empty(&slow_work_queue) && | ||
393 | list_empty(&vslow_work_queue) && | ||
394 | atomic_read(&slow_work_thread_count) > | ||
395 | slow_work_min_threads) | ||
396 | mod_timer(&slow_work_cull_timer, | ||
397 | jiffies + SLOW_WORK_CULL_TIMEOUT); | ||
398 | continue; | ||
399 | } | ||
400 | |||
401 | if (slow_work_threads_should_exit) | ||
402 | break; | ||
403 | |||
404 | if (slow_work_cull && slow_work_cull_thread()) | ||
405 | break; | ||
406 | } | ||
407 | |||
408 | if (atomic_dec_and_test(&slow_work_thread_count)) | ||
409 | complete_and_exit(&slow_work_last_thread_exited, 0); | ||
410 | return 0; | ||
411 | } | ||
412 | |||
413 | /* | ||
414 | * Handle thread cull timer expiration | ||
415 | */ | ||
416 | static void slow_work_cull_timeout(unsigned long data) | ||
417 | { | ||
418 | slow_work_cull = true; | ||
419 | wake_up(&slow_work_thread_wq); | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * Get a reference on slow work thread starter | ||
424 | */ | ||
425 | static int slow_work_new_thread_get_ref(struct slow_work *work) | ||
426 | { | ||
427 | return 0; | ||
428 | } | ||
429 | |||
430 | /* | ||
431 | * Drop a reference on slow work thread starter | ||
432 | */ | ||
433 | static void slow_work_new_thread_put_ref(struct slow_work *work) | ||
434 | { | ||
435 | } | ||
436 | |||
437 | /* | ||
438 | * Start a new slow work thread | ||
439 | */ | ||
440 | static void slow_work_new_thread_execute(struct slow_work *work) | ||
441 | { | ||
442 | struct task_struct *p; | ||
443 | |||
444 | if (slow_work_threads_should_exit) | ||
445 | return; | ||
446 | |||
447 | if (atomic_read(&slow_work_thread_count) >= slow_work_max_threads) | ||
448 | return; | ||
449 | |||
450 | if (!mutex_trylock(&slow_work_user_lock)) | ||
451 | return; | ||
452 | |||
453 | slow_work_may_not_start_new_thread = true; | ||
454 | atomic_inc(&slow_work_thread_count); | ||
455 | p = kthread_run(slow_work_thread, NULL, "kslowd"); | ||
456 | if (IS_ERR(p)) { | ||
457 | printk(KERN_DEBUG "Slow work thread pool: OOM\n"); | ||
458 | if (atomic_dec_and_test(&slow_work_thread_count)) | ||
459 | BUG(); /* we're running on a slow work thread... */ | ||
460 | mod_timer(&slow_work_oom_timer, | ||
461 | jiffies + SLOW_WORK_OOM_TIMEOUT); | ||
462 | } else { | ||
463 | /* ratelimit the starting of new threads */ | ||
464 | mod_timer(&slow_work_oom_timer, jiffies + 1); | ||
465 | } | ||
466 | |||
467 | mutex_unlock(&slow_work_user_lock); | ||
468 | } | ||
469 | |||
470 | static const struct slow_work_ops slow_work_new_thread_ops = { | ||
471 | .get_ref = slow_work_new_thread_get_ref, | ||
472 | .put_ref = slow_work_new_thread_put_ref, | ||
473 | .execute = slow_work_new_thread_execute, | ||
474 | }; | ||
475 | |||
476 | /* | ||
477 | * post-OOM new thread start suppression expiration | ||
478 | */ | ||
479 | static void slow_work_oom_timeout(unsigned long data) | ||
480 | { | ||
481 | slow_work_may_not_start_new_thread = false; | ||
482 | } | ||
483 | |||
484 | #ifdef CONFIG_SYSCTL | ||
485 | /* | ||
486 | * Handle adjustment of the minimum number of threads | ||
487 | */ | ||
488 | static int slow_work_min_threads_sysctl(struct ctl_table *table, int write, | ||
489 | struct file *filp, void __user *buffer, | ||
490 | size_t *lenp, loff_t *ppos) | ||
491 | { | ||
492 | int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
493 | int n; | ||
494 | |||
495 | if (ret == 0) { | ||
496 | mutex_lock(&slow_work_user_lock); | ||
497 | if (slow_work_user_count > 0) { | ||
498 | /* see if we need to start or stop threads */ | ||
499 | n = atomic_read(&slow_work_thread_count) - | ||
500 | slow_work_min_threads; | ||
501 | |||
502 | if (n < 0 && !slow_work_may_not_start_new_thread) | ||
503 | slow_work_enqueue(&slow_work_new_thread); | ||
504 | else if (n > 0) | ||
505 | mod_timer(&slow_work_cull_timer, | ||
506 | jiffies + SLOW_WORK_CULL_TIMEOUT); | ||
507 | } | ||
508 | mutex_unlock(&slow_work_user_lock); | ||
509 | } | ||
510 | |||
511 | return ret; | ||
512 | } | ||
513 | |||
514 | /* | ||
515 | * Handle adjustment of the maximum number of threads | ||
516 | */ | ||
517 | static int slow_work_max_threads_sysctl(struct ctl_table *table, int write, | ||
518 | struct file *filp, void __user *buffer, | ||
519 | size_t *lenp, loff_t *ppos) | ||
520 | { | ||
521 | int ret = proc_dointvec_minmax(table, write, filp, buffer, lenp, ppos); | ||
522 | int n; | ||
523 | |||
524 | if (ret == 0) { | ||
525 | mutex_lock(&slow_work_user_lock); | ||
526 | if (slow_work_user_count > 0) { | ||
527 | /* see if we need to stop threads */ | ||
528 | n = slow_work_max_threads - | ||
529 | atomic_read(&slow_work_thread_count); | ||
530 | |||
531 | if (n < 0) | ||
532 | mod_timer(&slow_work_cull_timer, | ||
533 | jiffies + SLOW_WORK_CULL_TIMEOUT); | ||
534 | } | ||
535 | mutex_unlock(&slow_work_user_lock); | ||
536 | } | ||
537 | |||
538 | return ret; | ||
539 | } | ||
540 | #endif /* CONFIG_SYSCTL */ | ||
541 | |||
542 | /** | ||
543 | * slow_work_register_user - Register a user of the facility | ||
544 | * | ||
545 | * Register a user of the facility, starting up the initial threads if there | ||
546 | * aren't any other users at this point. This will return 0 if successful, or | ||
547 | * an error if not. | ||
548 | */ | ||
549 | int slow_work_register_user(void) | ||
550 | { | ||
551 | struct task_struct *p; | ||
552 | int loop; | ||
553 | |||
554 | mutex_lock(&slow_work_user_lock); | ||
555 | |||
556 | if (slow_work_user_count == 0) { | ||
557 | printk(KERN_NOTICE "Slow work thread pool: Starting up\n"); | ||
558 | init_completion(&slow_work_last_thread_exited); | ||
559 | |||
560 | slow_work_threads_should_exit = false; | ||
561 | slow_work_init(&slow_work_new_thread, | ||
562 | &slow_work_new_thread_ops); | ||
563 | slow_work_may_not_start_new_thread = false; | ||
564 | slow_work_cull = false; | ||
565 | |||
566 | /* start the minimum number of threads */ | ||
567 | for (loop = 0; loop < slow_work_min_threads; loop++) { | ||
568 | atomic_inc(&slow_work_thread_count); | ||
569 | p = kthread_run(slow_work_thread, NULL, "kslowd"); | ||
570 | if (IS_ERR(p)) | ||
571 | goto error; | ||
572 | } | ||
573 | printk(KERN_NOTICE "Slow work thread pool: Ready\n"); | ||
574 | } | ||
575 | |||
576 | slow_work_user_count++; | ||
577 | mutex_unlock(&slow_work_user_lock); | ||
578 | return 0; | ||
579 | |||
580 | error: | ||
581 | if (atomic_dec_and_test(&slow_work_thread_count)) | ||
582 | complete(&slow_work_last_thread_exited); | ||
583 | if (loop > 0) { | ||
584 | printk(KERN_ERR "Slow work thread pool:" | ||
585 | " Aborting startup on ENOMEM\n"); | ||
586 | slow_work_threads_should_exit = true; | ||
587 | wake_up_all(&slow_work_thread_wq); | ||
588 | wait_for_completion(&slow_work_last_thread_exited); | ||
589 | printk(KERN_ERR "Slow work thread pool: Aborted\n"); | ||
590 | } | ||
591 | mutex_unlock(&slow_work_user_lock); | ||
592 | return PTR_ERR(p); | ||
593 | } | ||
594 | EXPORT_SYMBOL(slow_work_register_user); | ||
595 | |||
596 | /** | ||
597 | * slow_work_unregister_user - Unregister a user of the facility | ||
598 | * | ||
599 | * Unregister a user of the facility, killing all the threads if this was the | ||
600 | * last one. | ||
601 | */ | ||
602 | void slow_work_unregister_user(void) | ||
603 | { | ||
604 | mutex_lock(&slow_work_user_lock); | ||
605 | |||
606 | BUG_ON(slow_work_user_count <= 0); | ||
607 | |||
608 | slow_work_user_count--; | ||
609 | if (slow_work_user_count == 0) { | ||
610 | printk(KERN_NOTICE "Slow work thread pool: Shutting down\n"); | ||
611 | slow_work_threads_should_exit = true; | ||
612 | wake_up_all(&slow_work_thread_wq); | ||
613 | wait_for_completion(&slow_work_last_thread_exited); | ||
614 | printk(KERN_NOTICE "Slow work thread pool:" | ||
615 | " Shut down complete\n"); | ||
616 | } | ||
617 | |||
618 | del_timer_sync(&slow_work_cull_timer); | ||
619 | |||
620 | mutex_unlock(&slow_work_user_lock); | ||
621 | } | ||
622 | EXPORT_SYMBOL(slow_work_unregister_user); | ||
623 | |||
624 | /* | ||
625 | * Initialise the slow work facility | ||
626 | */ | ||
627 | static int __init init_slow_work(void) | ||
628 | { | ||
629 | unsigned nr_cpus = num_possible_cpus(); | ||
630 | |||
631 | if (slow_work_max_threads < nr_cpus) | ||
632 | slow_work_max_threads = nr_cpus; | ||
633 | #ifdef CONFIG_SYSCTL | ||
634 | if (slow_work_max_max_threads < nr_cpus * 2) | ||
635 | slow_work_max_max_threads = nr_cpus * 2; | ||
636 | #endif | ||
637 | return 0; | ||
638 | } | ||
639 | |||
640 | subsys_initcall(init_slow_work); | ||
diff --git a/kernel/smp.c b/kernel/smp.c index bbedbb7efe32..858baac568ee 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -2,40 +2,82 @@ | |||
2 | * Generic helpers for smp ipi calls | 2 | * Generic helpers for smp ipi calls |
3 | * | 3 | * |
4 | * (C) Jens Axboe <jens.axboe@oracle.com> 2008 | 4 | * (C) Jens Axboe <jens.axboe@oracle.com> 2008 |
5 | * | ||
6 | */ | 5 | */ |
7 | #include <linux/init.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/percpu.h> | ||
10 | #include <linux/rcupdate.h> | 6 | #include <linux/rcupdate.h> |
11 | #include <linux/rculist.h> | 7 | #include <linux/rculist.h> |
8 | #include <linux/kernel.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/percpu.h> | ||
11 | #include <linux/init.h> | ||
12 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
13 | #include <linux/cpu.h> | ||
13 | 14 | ||
14 | static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); | 15 | static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); |
15 | static LIST_HEAD(call_function_queue); | 16 | |
16 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock); | 17 | static struct { |
18 | struct list_head queue; | ||
19 | spinlock_t lock; | ||
20 | } call_function __cacheline_aligned_in_smp = | ||
21 | { | ||
22 | .queue = LIST_HEAD_INIT(call_function.queue), | ||
23 | .lock = __SPIN_LOCK_UNLOCKED(call_function.lock), | ||
24 | }; | ||
17 | 25 | ||
18 | enum { | 26 | enum { |
19 | CSD_FLAG_WAIT = 0x01, | 27 | CSD_FLAG_LOCK = 0x01, |
20 | CSD_FLAG_ALLOC = 0x02, | ||
21 | CSD_FLAG_LOCK = 0x04, | ||
22 | }; | 28 | }; |
23 | 29 | ||
24 | struct call_function_data { | 30 | struct call_function_data { |
25 | struct call_single_data csd; | 31 | struct call_single_data csd; |
26 | spinlock_t lock; | 32 | spinlock_t lock; |
27 | unsigned int refs; | 33 | unsigned int refs; |
28 | struct rcu_head rcu_head; | 34 | cpumask_var_t cpumask; |
29 | unsigned long cpumask_bits[]; | ||
30 | }; | 35 | }; |
31 | 36 | ||
32 | struct call_single_queue { | 37 | struct call_single_queue { |
33 | struct list_head list; | 38 | struct list_head list; |
34 | spinlock_t lock; | 39 | spinlock_t lock; |
40 | }; | ||
41 | |||
42 | static DEFINE_PER_CPU(struct call_function_data, cfd_data) = { | ||
43 | .lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock), | ||
44 | }; | ||
45 | |||
46 | static int | ||
47 | hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
48 | { | ||
49 | long cpu = (long)hcpu; | ||
50 | struct call_function_data *cfd = &per_cpu(cfd_data, cpu); | ||
51 | |||
52 | switch (action) { | ||
53 | case CPU_UP_PREPARE: | ||
54 | case CPU_UP_PREPARE_FROZEN: | ||
55 | if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, | ||
56 | cpu_to_node(cpu))) | ||
57 | return NOTIFY_BAD; | ||
58 | break; | ||
59 | |||
60 | #ifdef CONFIG_CPU_HOTPLUG | ||
61 | case CPU_UP_CANCELED: | ||
62 | case CPU_UP_CANCELED_FROZEN: | ||
63 | |||
64 | case CPU_DEAD: | ||
65 | case CPU_DEAD_FROZEN: | ||
66 | free_cpumask_var(cfd->cpumask); | ||
67 | break; | ||
68 | #endif | ||
69 | }; | ||
70 | |||
71 | return NOTIFY_OK; | ||
72 | } | ||
73 | |||
74 | static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { | ||
75 | .notifier_call = hotplug_cfd, | ||
35 | }; | 76 | }; |
36 | 77 | ||
37 | static int __cpuinit init_call_single_data(void) | 78 | static int __cpuinit init_call_single_data(void) |
38 | { | 79 | { |
80 | void *cpu = (void *)(long)smp_processor_id(); | ||
39 | int i; | 81 | int i; |
40 | 82 | ||
41 | for_each_possible_cpu(i) { | 83 | for_each_possible_cpu(i) { |
@@ -44,29 +86,63 @@ static int __cpuinit init_call_single_data(void) | |||
44 | spin_lock_init(&q->lock); | 86 | spin_lock_init(&q->lock); |
45 | INIT_LIST_HEAD(&q->list); | 87 | INIT_LIST_HEAD(&q->list); |
46 | } | 88 | } |
89 | |||
90 | hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); | ||
91 | register_cpu_notifier(&hotplug_cfd_notifier); | ||
92 | |||
47 | return 0; | 93 | return 0; |
48 | } | 94 | } |
49 | early_initcall(init_call_single_data); | 95 | early_initcall(init_call_single_data); |
50 | 96 | ||
51 | static void csd_flag_wait(struct call_single_data *data) | 97 | /* |
98 | * csd_lock/csd_unlock used to serialize access to per-cpu csd resources | ||
99 | * | ||
100 | * For non-synchronous ipi calls the csd can still be in use by the | ||
101 | * previous function call. For multi-cpu calls its even more interesting | ||
102 | * as we'll have to ensure no other cpu is observing our csd. | ||
103 | */ | ||
104 | static void csd_lock_wait(struct call_single_data *data) | ||
52 | { | 105 | { |
53 | /* Wait for response */ | 106 | while (data->flags & CSD_FLAG_LOCK) |
54 | do { | ||
55 | if (!(data->flags & CSD_FLAG_WAIT)) | ||
56 | break; | ||
57 | cpu_relax(); | 107 | cpu_relax(); |
58 | } while (1); | 108 | } |
109 | |||
110 | static void csd_lock(struct call_single_data *data) | ||
111 | { | ||
112 | csd_lock_wait(data); | ||
113 | data->flags = CSD_FLAG_LOCK; | ||
114 | |||
115 | /* | ||
116 | * prevent CPU from reordering the above assignment | ||
117 | * to ->flags with any subsequent assignments to other | ||
118 | * fields of the specified call_single_data structure: | ||
119 | */ | ||
120 | smp_mb(); | ||
121 | } | ||
122 | |||
123 | static void csd_unlock(struct call_single_data *data) | ||
124 | { | ||
125 | WARN_ON(!(data->flags & CSD_FLAG_LOCK)); | ||
126 | |||
127 | /* | ||
128 | * ensure we're all done before releasing data: | ||
129 | */ | ||
130 | smp_mb(); | ||
131 | |||
132 | data->flags &= ~CSD_FLAG_LOCK; | ||
59 | } | 133 | } |
60 | 134 | ||
61 | /* | 135 | /* |
62 | * Insert a previously allocated call_single_data element for execution | 136 | * Insert a previously allocated call_single_data element |
63 | * on the given CPU. data must already have ->func, ->info, and ->flags set. | 137 | * for execution on the given CPU. data must already have |
138 | * ->func, ->info, and ->flags set. | ||
64 | */ | 139 | */ |
65 | static void generic_exec_single(int cpu, struct call_single_data *data) | 140 | static |
141 | void generic_exec_single(int cpu, struct call_single_data *data, int wait) | ||
66 | { | 142 | { |
67 | struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); | 143 | struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); |
68 | int wait = data->flags & CSD_FLAG_WAIT, ipi; | ||
69 | unsigned long flags; | 144 | unsigned long flags; |
145 | int ipi; | ||
70 | 146 | ||
71 | spin_lock_irqsave(&dst->lock, flags); | 147 | spin_lock_irqsave(&dst->lock, flags); |
72 | ipi = list_empty(&dst->list); | 148 | ipi = list_empty(&dst->list); |
@@ -74,24 +150,21 @@ static void generic_exec_single(int cpu, struct call_single_data *data) | |||
74 | spin_unlock_irqrestore(&dst->lock, flags); | 150 | spin_unlock_irqrestore(&dst->lock, flags); |
75 | 151 | ||
76 | /* | 152 | /* |
77 | * Make the list addition visible before sending the ipi. | 153 | * The list addition should be visible before sending the IPI |
154 | * handler locks the list to pull the entry off it because of | ||
155 | * normal cache coherency rules implied by spinlocks. | ||
156 | * | ||
157 | * If IPIs can go out of order to the cache coherency protocol | ||
158 | * in an architecture, sufficient synchronisation should be added | ||
159 | * to arch code to make it appear to obey cache coherency WRT | ||
160 | * locking and barrier primitives. Generic code isn't really | ||
161 | * equipped to do the right thing... | ||
78 | */ | 162 | */ |
79 | smp_mb(); | ||
80 | |||
81 | if (ipi) | 163 | if (ipi) |
82 | arch_send_call_function_single_ipi(cpu); | 164 | arch_send_call_function_single_ipi(cpu); |
83 | 165 | ||
84 | if (wait) | 166 | if (wait) |
85 | csd_flag_wait(data); | 167 | csd_lock_wait(data); |
86 | } | ||
87 | |||
88 | static void rcu_free_call_data(struct rcu_head *head) | ||
89 | { | ||
90 | struct call_function_data *data; | ||
91 | |||
92 | data = container_of(head, struct call_function_data, rcu_head); | ||
93 | |||
94 | kfree(data); | ||
95 | } | 168 | } |
96 | 169 | ||
97 | /* | 170 | /* |
@@ -104,99 +177,83 @@ void generic_smp_call_function_interrupt(void) | |||
104 | int cpu = get_cpu(); | 177 | int cpu = get_cpu(); |
105 | 178 | ||
106 | /* | 179 | /* |
107 | * It's ok to use list_for_each_rcu() here even though we may delete | 180 | * Ensure entry is visible on call_function_queue after we have |
108 | * 'pos', since list_del_rcu() doesn't clear ->next | 181 | * entered the IPI. See comment in smp_call_function_many. |
182 | * If we don't have this, then we may miss an entry on the list | ||
183 | * and never get another IPI to process it. | ||
184 | */ | ||
185 | smp_mb(); | ||
186 | |||
187 | /* | ||
188 | * It's ok to use list_for_each_rcu() here even though we may | ||
189 | * delete 'pos', since list_del_rcu() doesn't clear ->next | ||
109 | */ | 190 | */ |
110 | rcu_read_lock(); | 191 | list_for_each_entry_rcu(data, &call_function.queue, csd.list) { |
111 | list_for_each_entry_rcu(data, &call_function_queue, csd.list) { | ||
112 | int refs; | 192 | int refs; |
113 | 193 | ||
114 | if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits))) | 194 | spin_lock(&data->lock); |
195 | if (!cpumask_test_cpu(cpu, data->cpumask)) { | ||
196 | spin_unlock(&data->lock); | ||
115 | continue; | 197 | continue; |
198 | } | ||
199 | cpumask_clear_cpu(cpu, data->cpumask); | ||
200 | spin_unlock(&data->lock); | ||
116 | 201 | ||
117 | data->csd.func(data->csd.info); | 202 | data->csd.func(data->csd.info); |
118 | 203 | ||
119 | spin_lock(&data->lock); | 204 | spin_lock(&data->lock); |
120 | cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits)); | ||
121 | WARN_ON(data->refs == 0); | 205 | WARN_ON(data->refs == 0); |
122 | data->refs--; | 206 | refs = --data->refs; |
123 | refs = data->refs; | 207 | if (!refs) { |
208 | spin_lock(&call_function.lock); | ||
209 | list_del_rcu(&data->csd.list); | ||
210 | spin_unlock(&call_function.lock); | ||
211 | } | ||
124 | spin_unlock(&data->lock); | 212 | spin_unlock(&data->lock); |
125 | 213 | ||
126 | if (refs) | 214 | if (refs) |
127 | continue; | 215 | continue; |
128 | 216 | ||
129 | spin_lock(&call_function_lock); | 217 | csd_unlock(&data->csd); |
130 | list_del_rcu(&data->csd.list); | ||
131 | spin_unlock(&call_function_lock); | ||
132 | |||
133 | if (data->csd.flags & CSD_FLAG_WAIT) { | ||
134 | /* | ||
135 | * serialize stores to data with the flag clear | ||
136 | * and wakeup | ||
137 | */ | ||
138 | smp_wmb(); | ||
139 | data->csd.flags &= ~CSD_FLAG_WAIT; | ||
140 | } | ||
141 | if (data->csd.flags & CSD_FLAG_ALLOC) | ||
142 | call_rcu(&data->rcu_head, rcu_free_call_data); | ||
143 | } | 218 | } |
144 | rcu_read_unlock(); | ||
145 | 219 | ||
146 | put_cpu(); | 220 | put_cpu(); |
147 | } | 221 | } |
148 | 222 | ||
149 | /* | 223 | /* |
150 | * Invoked by arch to handle an IPI for call function single. Must be called | 224 | * Invoked by arch to handle an IPI for call function single. Must be |
151 | * from the arch with interrupts disabled. | 225 | * called from the arch with interrupts disabled. |
152 | */ | 226 | */ |
153 | void generic_smp_call_function_single_interrupt(void) | 227 | void generic_smp_call_function_single_interrupt(void) |
154 | { | 228 | { |
155 | struct call_single_queue *q = &__get_cpu_var(call_single_queue); | 229 | struct call_single_queue *q = &__get_cpu_var(call_single_queue); |
230 | unsigned int data_flags; | ||
156 | LIST_HEAD(list); | 231 | LIST_HEAD(list); |
157 | 232 | ||
158 | /* | 233 | spin_lock(&q->lock); |
159 | * Need to see other stores to list head for checking whether | 234 | list_replace_init(&q->list, &list); |
160 | * list is empty without holding q->lock | 235 | spin_unlock(&q->lock); |
161 | */ | 236 | |
162 | smp_read_barrier_depends(); | 237 | while (!list_empty(&list)) { |
163 | while (!list_empty(&q->list)) { | 238 | struct call_single_data *data; |
164 | unsigned int data_flags; | 239 | |
165 | 240 | data = list_entry(list.next, struct call_single_data, list); | |
166 | spin_lock(&q->lock); | 241 | list_del(&data->list); |
167 | list_replace_init(&q->list, &list); | 242 | |
168 | spin_unlock(&q->lock); | 243 | /* |
169 | 244 | * 'data' can be invalid after this call if flags == 0 | |
170 | while (!list_empty(&list)) { | 245 | * (when called through generic_exec_single()), |
171 | struct call_single_data *data; | 246 | * so save them away before making the call: |
172 | 247 | */ | |
173 | data = list_entry(list.next, struct call_single_data, | 248 | data_flags = data->flags; |
174 | list); | 249 | |
175 | list_del(&data->list); | 250 | data->func(data->info); |
176 | 251 | ||
177 | /* | ||
178 | * 'data' can be invalid after this call if | ||
179 | * flags == 0 (when called through | ||
180 | * generic_exec_single(), so save them away before | ||
181 | * making the call. | ||
182 | */ | ||
183 | data_flags = data->flags; | ||
184 | |||
185 | data->func(data->info); | ||
186 | |||
187 | if (data_flags & CSD_FLAG_WAIT) { | ||
188 | smp_wmb(); | ||
189 | data->flags &= ~CSD_FLAG_WAIT; | ||
190 | } else if (data_flags & CSD_FLAG_LOCK) { | ||
191 | smp_wmb(); | ||
192 | data->flags &= ~CSD_FLAG_LOCK; | ||
193 | } else if (data_flags & CSD_FLAG_ALLOC) | ||
194 | kfree(data); | ||
195 | } | ||
196 | /* | 252 | /* |
197 | * See comment on outer loop | 253 | * Unlocked CSDs are valid through generic_exec_single(): |
198 | */ | 254 | */ |
199 | smp_read_barrier_depends(); | 255 | if (data_flags & CSD_FLAG_LOCK) |
256 | csd_unlock(data); | ||
200 | } | 257 | } |
201 | } | 258 | } |
202 | 259 | ||
@@ -215,65 +272,45 @@ static DEFINE_PER_CPU(struct call_single_data, csd_data); | |||
215 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | 272 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, |
216 | int wait) | 273 | int wait) |
217 | { | 274 | { |
218 | struct call_single_data d; | 275 | struct call_single_data d = { |
276 | .flags = 0, | ||
277 | }; | ||
219 | unsigned long flags; | 278 | unsigned long flags; |
220 | /* prevent preemption and reschedule on another processor, | 279 | int this_cpu; |
221 | as well as CPU removal */ | ||
222 | int me = get_cpu(); | ||
223 | int err = 0; | 280 | int err = 0; |
224 | 281 | ||
282 | /* | ||
283 | * prevent preemption and reschedule on another processor, | ||
284 | * as well as CPU removal | ||
285 | */ | ||
286 | this_cpu = get_cpu(); | ||
287 | |||
225 | /* Can deadlock when called with interrupts disabled */ | 288 | /* Can deadlock when called with interrupts disabled */ |
226 | WARN_ON(irqs_disabled()); | 289 | WARN_ON_ONCE(irqs_disabled() && !oops_in_progress); |
227 | 290 | ||
228 | if (cpu == me) { | 291 | if (cpu == this_cpu) { |
229 | local_irq_save(flags); | 292 | local_irq_save(flags); |
230 | func(info); | 293 | func(info); |
231 | local_irq_restore(flags); | 294 | local_irq_restore(flags); |
232 | } else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { | 295 | } else { |
233 | struct call_single_data *data; | 296 | if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { |
297 | struct call_single_data *data = &d; | ||
298 | |||
299 | if (!wait) | ||
300 | data = &__get_cpu_var(csd_data); | ||
234 | 301 | ||
235 | if (!wait) { | 302 | csd_lock(data); |
236 | /* | 303 | |
237 | * We are calling a function on a single CPU | 304 | data->func = func; |
238 | * and we are not going to wait for it to finish. | 305 | data->info = info; |
239 | * We first try to allocate the data, but if we | 306 | generic_exec_single(cpu, data, wait); |
240 | * fail, we fall back to use a per cpu data to pass | ||
241 | * the information to that CPU. Since all callers | ||
242 | * of this code will use the same data, we must | ||
243 | * synchronize the callers to prevent a new caller | ||
244 | * from corrupting the data before the callee | ||
245 | * can access it. | ||
246 | * | ||
247 | * The CSD_FLAG_LOCK is used to let us know when | ||
248 | * the IPI handler is done with the data. | ||
249 | * The first caller will set it, and the callee | ||
250 | * will clear it. The next caller must wait for | ||
251 | * it to clear before we set it again. This | ||
252 | * will make sure the callee is done with the | ||
253 | * data before a new caller will use it. | ||
254 | */ | ||
255 | data = kmalloc(sizeof(*data), GFP_ATOMIC); | ||
256 | if (data) | ||
257 | data->flags = CSD_FLAG_ALLOC; | ||
258 | else { | ||
259 | data = &per_cpu(csd_data, me); | ||
260 | while (data->flags & CSD_FLAG_LOCK) | ||
261 | cpu_relax(); | ||
262 | data->flags = CSD_FLAG_LOCK; | ||
263 | } | ||
264 | } else { | 307 | } else { |
265 | data = &d; | 308 | err = -ENXIO; /* CPU not online */ |
266 | data->flags = CSD_FLAG_WAIT; | ||
267 | } | 309 | } |
268 | |||
269 | data->func = func; | ||
270 | data->info = info; | ||
271 | generic_exec_single(cpu, data); | ||
272 | } else { | ||
273 | err = -ENXIO; /* CPU not online */ | ||
274 | } | 310 | } |
275 | 311 | ||
276 | put_cpu(); | 312 | put_cpu(); |
313 | |||
277 | return err; | 314 | return err; |
278 | } | 315 | } |
279 | EXPORT_SYMBOL(smp_call_function_single); | 316 | EXPORT_SYMBOL(smp_call_function_single); |
@@ -283,23 +320,26 @@ EXPORT_SYMBOL(smp_call_function_single); | |||
283 | * @cpu: The CPU to run on. | 320 | * @cpu: The CPU to run on. |
284 | * @data: Pre-allocated and setup data structure | 321 | * @data: Pre-allocated and setup data structure |
285 | * | 322 | * |
286 | * Like smp_call_function_single(), but allow caller to pass in a pre-allocated | 323 | * Like smp_call_function_single(), but allow caller to pass in a |
287 | * data structure. Useful for embedding @data inside other structures, for | 324 | * pre-allocated data structure. Useful for embedding @data inside |
288 | * instance. | 325 | * other structures, for instance. |
289 | * | ||
290 | */ | 326 | */ |
291 | void __smp_call_function_single(int cpu, struct call_single_data *data) | 327 | void __smp_call_function_single(int cpu, struct call_single_data *data, |
328 | int wait) | ||
292 | { | 329 | { |
330 | csd_lock(data); | ||
331 | |||
293 | /* Can deadlock when called with interrupts disabled */ | 332 | /* Can deadlock when called with interrupts disabled */ |
294 | WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled()); | 333 | WARN_ON_ONCE(wait && irqs_disabled() && !oops_in_progress); |
295 | 334 | ||
296 | generic_exec_single(cpu, data); | 335 | generic_exec_single(cpu, data, wait); |
297 | } | 336 | } |
298 | 337 | ||
299 | /* FIXME: Shim for archs using old arch_send_call_function_ipi API. */ | 338 | /* Deprecated: shim for archs using old arch_send_call_function_ipi API. */ |
339 | |||
300 | #ifndef arch_send_call_function_ipi_mask | 340 | #ifndef arch_send_call_function_ipi_mask |
301 | #define arch_send_call_function_ipi_mask(maskp) \ | 341 | # define arch_send_call_function_ipi_mask(maskp) \ |
302 | arch_send_call_function_ipi(*(maskp)) | 342 | arch_send_call_function_ipi(*(maskp)) |
303 | #endif | 343 | #endif |
304 | 344 | ||
305 | /** | 345 | /** |
@@ -307,7 +347,8 @@ void __smp_call_function_single(int cpu, struct call_single_data *data) | |||
307 | * @mask: The set of cpus to run on (only runs on online subset). | 347 | * @mask: The set of cpus to run on (only runs on online subset). |
308 | * @func: The function to run. This must be fast and non-blocking. | 348 | * @func: The function to run. This must be fast and non-blocking. |
309 | * @info: An arbitrary pointer to pass to the function. | 349 | * @info: An arbitrary pointer to pass to the function. |
310 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | 350 | * @wait: If true, wait (atomically) until function has completed |
351 | * on other CPUs. | ||
311 | * | 352 | * |
312 | * If @wait is true, then returns once @func has returned. Note that @wait | 353 | * If @wait is true, then returns once @func has returned. Note that @wait |
313 | * will be implicitly turned on in case of allocation failures, since | 354 | * will be implicitly turned on in case of allocation failures, since |
@@ -318,27 +359,27 @@ void __smp_call_function_single(int cpu, struct call_single_data *data) | |||
318 | * must be disabled when calling this function. | 359 | * must be disabled when calling this function. |
319 | */ | 360 | */ |
320 | void smp_call_function_many(const struct cpumask *mask, | 361 | void smp_call_function_many(const struct cpumask *mask, |
321 | void (*func)(void *), void *info, | 362 | void (*func)(void *), void *info, bool wait) |
322 | bool wait) | ||
323 | { | 363 | { |
324 | struct call_function_data *data; | 364 | struct call_function_data *data; |
325 | unsigned long flags; | 365 | unsigned long flags; |
326 | int cpu, next_cpu; | 366 | int cpu, next_cpu, this_cpu = smp_processor_id(); |
327 | 367 | ||
328 | /* Can deadlock when called with interrupts disabled */ | 368 | /* Can deadlock when called with interrupts disabled */ |
329 | WARN_ON(irqs_disabled()); | 369 | WARN_ON_ONCE(irqs_disabled() && !oops_in_progress); |
330 | 370 | ||
331 | /* So, what's a CPU they want? Ignoring this one. */ | 371 | /* So, what's a CPU they want? Ignoring this one. */ |
332 | cpu = cpumask_first_and(mask, cpu_online_mask); | 372 | cpu = cpumask_first_and(mask, cpu_online_mask); |
333 | if (cpu == smp_processor_id()) | 373 | if (cpu == this_cpu) |
334 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 374 | cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
375 | |||
335 | /* No online cpus? We're done. */ | 376 | /* No online cpus? We're done. */ |
336 | if (cpu >= nr_cpu_ids) | 377 | if (cpu >= nr_cpu_ids) |
337 | return; | 378 | return; |
338 | 379 | ||
339 | /* Do we have another CPU which isn't us? */ | 380 | /* Do we have another CPU which isn't us? */ |
340 | next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); | 381 | next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); |
341 | if (next_cpu == smp_processor_id()) | 382 | if (next_cpu == this_cpu) |
342 | next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); | 383 | next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); |
343 | 384 | ||
344 | /* Fastpath: do that cpu by itself. */ | 385 | /* Fastpath: do that cpu by itself. */ |
@@ -347,43 +388,40 @@ void smp_call_function_many(const struct cpumask *mask, | |||
347 | return; | 388 | return; |
348 | } | 389 | } |
349 | 390 | ||
350 | data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC); | 391 | data = &__get_cpu_var(cfd_data); |
351 | if (unlikely(!data)) { | 392 | csd_lock(&data->csd); |
352 | /* Slow path. */ | ||
353 | for_each_online_cpu(cpu) { | ||
354 | if (cpu == smp_processor_id()) | ||
355 | continue; | ||
356 | if (cpumask_test_cpu(cpu, mask)) | ||
357 | smp_call_function_single(cpu, func, info, wait); | ||
358 | } | ||
359 | return; | ||
360 | } | ||
361 | 393 | ||
362 | spin_lock_init(&data->lock); | 394 | spin_lock_irqsave(&data->lock, flags); |
363 | data->csd.flags = CSD_FLAG_ALLOC; | ||
364 | if (wait) | ||
365 | data->csd.flags |= CSD_FLAG_WAIT; | ||
366 | data->csd.func = func; | 395 | data->csd.func = func; |
367 | data->csd.info = info; | 396 | data->csd.info = info; |
368 | cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask); | 397 | cpumask_and(data->cpumask, mask, cpu_online_mask); |
369 | cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits)); | 398 | cpumask_clear_cpu(this_cpu, data->cpumask); |
370 | data->refs = cpumask_weight(to_cpumask(data->cpumask_bits)); | 399 | data->refs = cpumask_weight(data->cpumask); |
371 | 400 | ||
372 | spin_lock_irqsave(&call_function_lock, flags); | 401 | spin_lock(&call_function.lock); |
373 | list_add_tail_rcu(&data->csd.list, &call_function_queue); | 402 | /* |
374 | spin_unlock_irqrestore(&call_function_lock, flags); | 403 | * Place entry at the _HEAD_ of the list, so that any cpu still |
404 | * observing the entry in generic_smp_call_function_interrupt() | ||
405 | * will not miss any other list entries: | ||
406 | */ | ||
407 | list_add_rcu(&data->csd.list, &call_function.queue); | ||
408 | spin_unlock(&call_function.lock); | ||
409 | |||
410 | spin_unlock_irqrestore(&data->lock, flags); | ||
375 | 411 | ||
376 | /* | 412 | /* |
377 | * Make the list addition visible before sending the ipi. | 413 | * Make the list addition visible before sending the ipi. |
414 | * (IPIs must obey or appear to obey normal Linux cache | ||
415 | * coherency rules -- see comment in generic_exec_single). | ||
378 | */ | 416 | */ |
379 | smp_mb(); | 417 | smp_mb(); |
380 | 418 | ||
381 | /* Send a message to all CPUs in the map */ | 419 | /* Send a message to all CPUs in the map */ |
382 | arch_send_call_function_ipi_mask(to_cpumask(data->cpumask_bits)); | 420 | arch_send_call_function_ipi_mask(data->cpumask); |
383 | 421 | ||
384 | /* optionally wait for the CPUs to complete */ | 422 | /* Optionally wait for the CPUs to complete */ |
385 | if (wait) | 423 | if (wait) |
386 | csd_flag_wait(&data->csd); | 424 | csd_lock_wait(&data->csd); |
387 | } | 425 | } |
388 | EXPORT_SYMBOL(smp_call_function_many); | 426 | EXPORT_SYMBOL(smp_call_function_many); |
389 | 427 | ||
@@ -391,7 +429,8 @@ EXPORT_SYMBOL(smp_call_function_many); | |||
391 | * smp_call_function(): Run a function on all other CPUs. | 429 | * smp_call_function(): Run a function on all other CPUs. |
392 | * @func: The function to run. This must be fast and non-blocking. | 430 | * @func: The function to run. This must be fast and non-blocking. |
393 | * @info: An arbitrary pointer to pass to the function. | 431 | * @info: An arbitrary pointer to pass to the function. |
394 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | 432 | * @wait: If true, wait (atomically) until function has completed |
433 | * on other CPUs. | ||
395 | * | 434 | * |
396 | * Returns 0. | 435 | * Returns 0. |
397 | * | 436 | * |
@@ -407,26 +446,27 @@ int smp_call_function(void (*func)(void *), void *info, int wait) | |||
407 | preempt_disable(); | 446 | preempt_disable(); |
408 | smp_call_function_many(cpu_online_mask, func, info, wait); | 447 | smp_call_function_many(cpu_online_mask, func, info, wait); |
409 | preempt_enable(); | 448 | preempt_enable(); |
449 | |||
410 | return 0; | 450 | return 0; |
411 | } | 451 | } |
412 | EXPORT_SYMBOL(smp_call_function); | 452 | EXPORT_SYMBOL(smp_call_function); |
413 | 453 | ||
414 | void ipi_call_lock(void) | 454 | void ipi_call_lock(void) |
415 | { | 455 | { |
416 | spin_lock(&call_function_lock); | 456 | spin_lock(&call_function.lock); |
417 | } | 457 | } |
418 | 458 | ||
419 | void ipi_call_unlock(void) | 459 | void ipi_call_unlock(void) |
420 | { | 460 | { |
421 | spin_unlock(&call_function_lock); | 461 | spin_unlock(&call_function.lock); |
422 | } | 462 | } |
423 | 463 | ||
424 | void ipi_call_lock_irq(void) | 464 | void ipi_call_lock_irq(void) |
425 | { | 465 | { |
426 | spin_lock_irq(&call_function_lock); | 466 | spin_lock_irq(&call_function.lock); |
427 | } | 467 | } |
428 | 468 | ||
429 | void ipi_call_unlock_irq(void) | 469 | void ipi_call_unlock_irq(void) |
430 | { | 470 | { |
431 | spin_unlock_irq(&call_function_lock); | 471 | spin_unlock_irq(&call_function.lock); |
432 | } | 472 | } |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 57d3f67f6f38..ea23ec087ee9 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
@@ -180,7 +180,7 @@ asmlinkage void __do_softirq(void) | |||
180 | account_system_vtime(current); | 180 | account_system_vtime(current); |
181 | 181 | ||
182 | __local_bh_disable((unsigned long)__builtin_return_address(0)); | 182 | __local_bh_disable((unsigned long)__builtin_return_address(0)); |
183 | trace_softirq_enter(); | 183 | lockdep_softirq_enter(); |
184 | 184 | ||
185 | cpu = smp_processor_id(); | 185 | cpu = smp_processor_id(); |
186 | restart: | 186 | restart: |
@@ -220,7 +220,7 @@ restart: | |||
220 | if (pending) | 220 | if (pending) |
221 | wakeup_softirqd(); | 221 | wakeup_softirqd(); |
222 | 222 | ||
223 | trace_softirq_exit(); | 223 | lockdep_softirq_exit(); |
224 | 224 | ||
225 | account_system_vtime(current); | 225 | account_system_vtime(current); |
226 | _local_bh_enable(); | 226 | _local_bh_enable(); |
@@ -496,7 +496,7 @@ static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softir | |||
496 | cp->flags = 0; | 496 | cp->flags = 0; |
497 | cp->priv = softirq; | 497 | cp->priv = softirq; |
498 | 498 | ||
499 | __smp_call_function_single(cpu, cp); | 499 | __smp_call_function_single(cpu, cp, 0); |
500 | return 0; | 500 | return 0; |
501 | } | 501 | } |
502 | return 1; | 502 | return 1; |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5ec4543dfc06..82350f8f04f6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <linux/acpi.h> | 48 | #include <linux/acpi.h> |
49 | #include <linux/reboot.h> | 49 | #include <linux/reboot.h> |
50 | #include <linux/ftrace.h> | 50 | #include <linux/ftrace.h> |
51 | #include <linux/slow-work.h> | ||
51 | 52 | ||
52 | #include <asm/uaccess.h> | 53 | #include <asm/uaccess.h> |
53 | #include <asm/processor.h> | 54 | #include <asm/processor.h> |
@@ -897,6 +898,14 @@ static struct ctl_table kern_table[] = { | |||
897 | .proc_handler = &scan_unevictable_handler, | 898 | .proc_handler = &scan_unevictable_handler, |
898 | }, | 899 | }, |
899 | #endif | 900 | #endif |
901 | #ifdef CONFIG_SLOW_WORK | ||
902 | { | ||
903 | .ctl_name = CTL_UNNUMBERED, | ||
904 | .procname = "slow-work", | ||
905 | .mode = 0555, | ||
906 | .child = slow_work_sysctls, | ||
907 | }, | ||
908 | #endif | ||
900 | /* | 909 | /* |
901 | * NOTE: do not add new entries to this table unless you have read | 910 | * NOTE: do not add new entries to this table unless you have read |
902 | * Documentation/sysctl/ctl_unnumbered.txt | 911 | * Documentation/sysctl/ctl_unnumbered.txt |
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 34e707e5ab87..504086ab4443 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig | |||
@@ -72,11 +72,10 @@ config FUNCTION_GRAPH_TRACER | |||
72 | help | 72 | help |
73 | Enable the kernel to trace a function at both its return | 73 | Enable the kernel to trace a function at both its return |
74 | and its entry. | 74 | and its entry. |
75 | It's first purpose is to trace the duration of functions and | 75 | Its first purpose is to trace the duration of functions and |
76 | draw a call graph for each thread with some informations like | 76 | draw a call graph for each thread with some information like |
77 | the return value. | 77 | the return value. This is done by setting the current return |
78 | This is done by setting the current return address on the current | 78 | address on the current task structure into a stack of calls. |
79 | task structure into a stack of calls. | ||
80 | 79 | ||
81 | config IRQSOFF_TRACER | 80 | config IRQSOFF_TRACER |
82 | bool "Interrupts-off Latency Tracer" | 81 | bool "Interrupts-off Latency Tracer" |
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fdf913dfc7e8..53e8c8bc0c98 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c | |||
@@ -1908,7 +1908,7 @@ int register_ftrace_function(struct ftrace_ops *ops) | |||
1908 | } | 1908 | } |
1909 | 1909 | ||
1910 | /** | 1910 | /** |
1911 | * unregister_ftrace_function - unresgister a function for profiling. | 1911 | * unregister_ftrace_function - unregister a function for profiling. |
1912 | * @ops - ops structure that holds the function to unregister | 1912 | * @ops - ops structure that holds the function to unregister |
1913 | * | 1913 | * |
1914 | * Unregister a function that was added to be called by ftrace profiling. | 1914 | * Unregister a function that was added to be called by ftrace profiling. |