aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/backtracetest.c65
-rw-r--r--kernel/hrtimer.c9
-rw-r--r--kernel/irq/manage.c33
-rw-r--r--kernel/irq/proc.c59
-rw-r--r--kernel/pid.c1
-rw-r--r--kernel/posix-cpu-timers.c3
-rw-r--r--kernel/profile.c6
-rw-r--r--kernel/rcuclassic.c34
-rw-r--r--kernel/rcupdate.c71
-rw-r--r--kernel/rcupreempt.c418
-rw-r--r--kernel/rcupreempt_trace.c1
-rw-r--r--kernel/rcutorture.c174
-rw-r--r--kernel/smp.c383
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/stacktrace.c14
-rw-r--r--kernel/sysctl.c13
-rw-r--r--kernel/time/tick-broadcast.c2
-rw-r--r--kernel/time/tick-sched.c8
19 files changed, 1182 insertions, 117 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index f6328e16dfdd..0a7ed838984b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
39obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o 39obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
40obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o 40obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
41obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o 41obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
42obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
42obj-$(CONFIG_SMP) += spinlock.o 43obj-$(CONFIG_SMP) += spinlock.o
43obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o 44obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
44obj-$(CONFIG_PROVE_LOCKING) += spinlock.o 45obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
diff --git a/kernel/backtracetest.c b/kernel/backtracetest.c
index d1a7605c5b8f..a5e026bc45c4 100644
--- a/kernel/backtracetest.c
+++ b/kernel/backtracetest.c
@@ -10,30 +10,73 @@
10 * of the License. 10 * of the License.
11 */ 11 */
12 12
13#include <linux/completion.h>
14#include <linux/delay.h>
15#include <linux/interrupt.h>
13#include <linux/module.h> 16#include <linux/module.h>
14#include <linux/sched.h> 17#include <linux/sched.h>
15#include <linux/delay.h> 18#include <linux/stacktrace.h>
19
20static void backtrace_test_normal(void)
21{
22 printk("Testing a backtrace from process context.\n");
23 printk("The following trace is a kernel self test and not a bug!\n");
16 24
17static struct timer_list backtrace_timer; 25 dump_stack();
26}
18 27
19static void backtrace_test_timer(unsigned long data) 28static DECLARE_COMPLETION(backtrace_work);
29
30static void backtrace_test_irq_callback(unsigned long data)
31{
32 dump_stack();
33 complete(&backtrace_work);
34}
35
36static DECLARE_TASKLET(backtrace_tasklet, &backtrace_test_irq_callback, 0);
37
38static void backtrace_test_irq(void)
20{ 39{
21 printk("Testing a backtrace from irq context.\n"); 40 printk("Testing a backtrace from irq context.\n");
22 printk("The following trace is a kernel self test and not a bug!\n"); 41 printk("The following trace is a kernel self test and not a bug!\n");
23 dump_stack(); 42
43 init_completion(&backtrace_work);
44 tasklet_schedule(&backtrace_tasklet);
45 wait_for_completion(&backtrace_work);
46}
47
48#ifdef CONFIG_STACKTRACE
49static void backtrace_test_saved(void)
50{
51 struct stack_trace trace;
52 unsigned long entries[8];
53
54 printk("Testing a saved backtrace.\n");
55 printk("The following trace is a kernel self test and not a bug!\n");
56
57 trace.nr_entries = 0;
58 trace.max_entries = ARRAY_SIZE(entries);
59 trace.entries = entries;
60 trace.skip = 0;
61
62 save_stack_trace(&trace);
63 print_stack_trace(&trace, 0);
64}
65#else
66static void backtrace_test_saved(void)
67{
68 printk("Saved backtrace test skipped.\n");
24} 69}
70#endif
71
25static int backtrace_regression_test(void) 72static int backtrace_regression_test(void)
26{ 73{
27 printk("====[ backtrace testing ]===========\n"); 74 printk("====[ backtrace testing ]===========\n");
28 printk("Testing a backtrace from process context.\n");
29 printk("The following trace is a kernel self test and not a bug!\n");
30 dump_stack();
31 75
32 init_timer(&backtrace_timer); 76 backtrace_test_normal();
33 backtrace_timer.function = backtrace_test_timer; 77 backtrace_test_irq();
34 mod_timer(&backtrace_timer, jiffies + 10); 78 backtrace_test_saved();
35 79
36 msleep(10);
37 printk("====[ end of backtrace testing ]====\n"); 80 printk("====[ end of backtrace testing ]====\n");
38 return 0; 81 return 0;
39} 82}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 27a83ee41443..b8e4dce80a74 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -300,11 +300,10 @@ EXPORT_SYMBOL_GPL(ktime_sub_ns);
300 */ 300 */
301u64 ktime_divns(const ktime_t kt, s64 div) 301u64 ktime_divns(const ktime_t kt, s64 div)
302{ 302{
303 u64 dclc, inc, dns; 303 u64 dclc;
304 int sft = 0; 304 int sft = 0;
305 305
306 dclc = dns = ktime_to_ns(kt); 306 dclc = ktime_to_ns(kt);
307 inc = div;
308 /* Make sure the divisor is less than 2^32: */ 307 /* Make sure the divisor is less than 2^32: */
309 while (div >> 32) { 308 while (div >> 32) {
310 sft++; 309 sft++;
@@ -623,7 +622,7 @@ static void retrigger_next_event(void *arg)
623void clock_was_set(void) 622void clock_was_set(void)
624{ 623{
625 /* Retrigger the CPU local events everywhere */ 624 /* Retrigger the CPU local events everywhere */
626 on_each_cpu(retrigger_next_event, NULL, 0, 1); 625 on_each_cpu(retrigger_next_event, NULL, 1);
627} 626}
628 627
629/* 628/*
@@ -632,8 +631,6 @@ void clock_was_set(void)
632 */ 631 */
633void hres_timers_resume(void) 632void hres_timers_resume(void)
634{ 633{
635 WARN_ON_ONCE(num_online_cpus() > 1);
636
637 /* Retrigger the CPU local events: */ 634 /* Retrigger the CPU local events: */
638 retrigger_next_event(NULL); 635 retrigger_next_event(NULL);
639} 636}
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 46d6611a33bb..77a51be36010 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -17,6 +17,8 @@
17 17
18#ifdef CONFIG_SMP 18#ifdef CONFIG_SMP
19 19
20cpumask_t irq_default_affinity = CPU_MASK_ALL;
21
20/** 22/**
21 * synchronize_irq - wait for pending IRQ handlers (on other CPUs) 23 * synchronize_irq - wait for pending IRQ handlers (on other CPUs)
22 * @irq: interrupt number to wait for 24 * @irq: interrupt number to wait for
@@ -95,6 +97,27 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
95 return 0; 97 return 0;
96} 98}
97 99
100#ifndef CONFIG_AUTO_IRQ_AFFINITY
101/*
102 * Generic version of the affinity autoselector.
103 */
104int irq_select_affinity(unsigned int irq)
105{
106 cpumask_t mask;
107
108 if (!irq_can_set_affinity(irq))
109 return 0;
110
111 cpus_and(mask, cpu_online_map, irq_default_affinity);
112
113 irq_desc[irq].affinity = mask;
114 irq_desc[irq].chip->set_affinity(irq, mask);
115
116 set_balance_irq_affinity(irq, mask);
117 return 0;
118}
119#endif
120
98#endif 121#endif
99 122
100/** 123/**
@@ -354,7 +377,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
354 377
355 /* Setup the type (level, edge polarity) if configured: */ 378 /* Setup the type (level, edge polarity) if configured: */
356 if (new->flags & IRQF_TRIGGER_MASK) { 379 if (new->flags & IRQF_TRIGGER_MASK) {
357 if (desc->chip && desc->chip->set_type) 380 if (desc->chip->set_type)
358 desc->chip->set_type(irq, 381 desc->chip->set_type(irq,
359 new->flags & IRQF_TRIGGER_MASK); 382 new->flags & IRQF_TRIGGER_MASK);
360 else 383 else
@@ -364,8 +387,7 @@ int setup_irq(unsigned int irq, struct irqaction *new)
364 */ 387 */
365 printk(KERN_WARNING "No IRQF_TRIGGER set_type " 388 printk(KERN_WARNING "No IRQF_TRIGGER set_type "
366 "function for IRQ %d (%s)\n", irq, 389 "function for IRQ %d (%s)\n", irq,
367 desc->chip ? desc->chip->name : 390 desc->chip->name);
368 "unknown");
369 } else 391 } else
370 compat_irq_chip_set_default_handler(desc); 392 compat_irq_chip_set_default_handler(desc);
371 393
@@ -382,6 +404,9 @@ int setup_irq(unsigned int irq, struct irqaction *new)
382 } else 404 } else
383 /* Undo nested disables: */ 405 /* Undo nested disables: */
384 desc->depth = 1; 406 desc->depth = 1;
407
408 /* Set default affinity mask once everything is setup */
409 irq_select_affinity(irq);
385 } 410 }
386 /* Reset broken irq detection when installing new handler */ 411 /* Reset broken irq detection when installing new handler */
387 desc->irq_count = 0; 412 desc->irq_count = 0;
@@ -571,8 +596,6 @@ int request_irq(unsigned int irq, irq_handler_t handler,
571 action->next = NULL; 596 action->next = NULL;
572 action->dev_id = dev_id; 597 action->dev_id = dev_id;
573 598
574 select_smp_affinity(irq);
575
576#ifdef CONFIG_DEBUG_SHIRQ 599#ifdef CONFIG_DEBUG_SHIRQ
577 if (irqflags & IRQF_SHARED) { 600 if (irqflags & IRQF_SHARED) {
578 /* 601 /*
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index c2f2ccb0549a..6c6d35d68ee9 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -44,7 +44,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
44 unsigned long count, void *data) 44 unsigned long count, void *data)
45{ 45{
46 unsigned int irq = (int)(long)data, full_count = count, err; 46 unsigned int irq = (int)(long)data, full_count = count, err;
47 cpumask_t new_value, tmp; 47 cpumask_t new_value;
48 48
49 if (!irq_desc[irq].chip->set_affinity || no_irq_affinity || 49 if (!irq_desc[irq].chip->set_affinity || no_irq_affinity ||
50 irq_balancing_disabled(irq)) 50 irq_balancing_disabled(irq))
@@ -62,17 +62,51 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
62 * way to make the system unusable accidentally :-) At least 62 * way to make the system unusable accidentally :-) At least
63 * one online CPU still has to be targeted. 63 * one online CPU still has to be targeted.
64 */ 64 */
65 cpus_and(tmp, new_value, cpu_online_map); 65 if (!cpus_intersects(new_value, cpu_online_map))
66 if (cpus_empty(tmp))
67 /* Special case for empty set - allow the architecture 66 /* Special case for empty set - allow the architecture
68 code to set default SMP affinity. */ 67 code to set default SMP affinity. */
69 return select_smp_affinity(irq) ? -EINVAL : full_count; 68 return irq_select_affinity(irq) ? -EINVAL : full_count;
70 69
71 irq_set_affinity(irq, new_value); 70 irq_set_affinity(irq, new_value);
72 71
73 return full_count; 72 return full_count;
74} 73}
75 74
75static int default_affinity_read(char *page, char **start, off_t off,
76 int count, int *eof, void *data)
77{
78 int len = cpumask_scnprintf(page, count, irq_default_affinity);
79 if (count - len < 2)
80 return -EINVAL;
81 len += sprintf(page + len, "\n");
82 return len;
83}
84
85static int default_affinity_write(struct file *file, const char __user *buffer,
86 unsigned long count, void *data)
87{
88 unsigned int full_count = count, err;
89 cpumask_t new_value;
90
91 err = cpumask_parse_user(buffer, count, new_value);
92 if (err)
93 return err;
94
95 if (!is_affinity_mask_valid(new_value))
96 return -EINVAL;
97
98 /*
99 * Do not allow disabling IRQs completely - it's a too easy
100 * way to make the system unusable accidentally :-) At least
101 * one online CPU still has to be targeted.
102 */
103 if (!cpus_intersects(new_value, cpu_online_map))
104 return -EINVAL;
105
106 irq_default_affinity = new_value;
107
108 return full_count;
109}
76#endif 110#endif
77 111
78static int irq_spurious_read(char *page, char **start, off_t off, 112static int irq_spurious_read(char *page, char **start, off_t off,
@@ -171,6 +205,21 @@ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
171 remove_proc_entry(action->dir->name, irq_desc[irq].dir); 205 remove_proc_entry(action->dir->name, irq_desc[irq].dir);
172} 206}
173 207
208void register_default_affinity_proc(void)
209{
210#ifdef CONFIG_SMP
211 struct proc_dir_entry *entry;
212
213 /* create /proc/irq/default_smp_affinity */
214 entry = create_proc_entry("default_smp_affinity", 0600, root_irq_dir);
215 if (entry) {
216 entry->data = NULL;
217 entry->read_proc = default_affinity_read;
218 entry->write_proc = default_affinity_write;
219 }
220#endif
221}
222
174void init_irq_proc(void) 223void init_irq_proc(void)
175{ 224{
176 int i; 225 int i;
@@ -180,6 +229,8 @@ void init_irq_proc(void)
180 if (!root_irq_dir) 229 if (!root_irq_dir)
181 return; 230 return;
182 231
232 register_default_affinity_proc();
233
183 /* 234 /*
184 * Create entries for all existing IRQs. 235 * Create entries for all existing IRQs.
185 */ 236 */
diff --git a/kernel/pid.c b/kernel/pid.c
index 20d59fa2d493..30bd5d4b2ac7 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -30,6 +30,7 @@
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/slab.h> 31#include <linux/slab.h>
32#include <linux/init.h> 32#include <linux/init.h>
33#include <linux/rculist.h>
33#include <linux/bootmem.h> 34#include <linux/bootmem.h>
34#include <linux/hash.h> 35#include <linux/hash.h>
35#include <linux/pid_namespace.h> 36#include <linux/pid_namespace.h>
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index f1525ad06cb3..c42a03aef36f 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -1037,6 +1037,9 @@ static void check_thread_timers(struct task_struct *tsk,
1037 sig->rlim[RLIMIT_RTTIME].rlim_cur += 1037 sig->rlim[RLIMIT_RTTIME].rlim_cur +=
1038 USEC_PER_SEC; 1038 USEC_PER_SEC;
1039 } 1039 }
1040 printk(KERN_INFO
1041 "RT Watchdog Timeout: %s[%d]\n",
1042 tsk->comm, task_pid_nr(tsk));
1040 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk); 1043 __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
1041 } 1044 }
1042 } 1045 }
diff --git a/kernel/profile.c b/kernel/profile.c
index ae7ead82cbc9..58926411eb2a 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -252,7 +252,7 @@ static void profile_flip_buffers(void)
252 mutex_lock(&profile_flip_mutex); 252 mutex_lock(&profile_flip_mutex);
253 j = per_cpu(cpu_profile_flip, get_cpu()); 253 j = per_cpu(cpu_profile_flip, get_cpu());
254 put_cpu(); 254 put_cpu();
255 on_each_cpu(__profile_flip_buffers, NULL, 0, 1); 255 on_each_cpu(__profile_flip_buffers, NULL, 1);
256 for_each_online_cpu(cpu) { 256 for_each_online_cpu(cpu) {
257 struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j]; 257 struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
258 for (i = 0; i < NR_PROFILE_HIT; ++i) { 258 for (i = 0; i < NR_PROFILE_HIT; ++i) {
@@ -275,7 +275,7 @@ static void profile_discard_flip_buffers(void)
275 mutex_lock(&profile_flip_mutex); 275 mutex_lock(&profile_flip_mutex);
276 i = per_cpu(cpu_profile_flip, get_cpu()); 276 i = per_cpu(cpu_profile_flip, get_cpu());
277 put_cpu(); 277 put_cpu();
278 on_each_cpu(__profile_flip_buffers, NULL, 0, 1); 278 on_each_cpu(__profile_flip_buffers, NULL, 1);
279 for_each_online_cpu(cpu) { 279 for_each_online_cpu(cpu) {
280 struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i]; 280 struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
281 memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit)); 281 memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
@@ -558,7 +558,7 @@ static int __init create_hash_tables(void)
558out_cleanup: 558out_cleanup:
559 prof_on = 0; 559 prof_on = 0;
560 smp_mb(); 560 smp_mb();
561 on_each_cpu(profile_nop, NULL, 0, 1); 561 on_each_cpu(profile_nop, NULL, 1);
562 for_each_online_cpu(cpu) { 562 for_each_online_cpu(cpu) {
563 struct page *page; 563 struct page *page;
564 564
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 65c0906080ef..16eeeaa9d618 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -387,6 +387,10 @@ static void __rcu_offline_cpu(struct rcu_data *this_rdp,
387 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail); 387 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
388 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); 388 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
389 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); 389 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
390
391 local_irq_disable();
392 this_rdp->qlen += rdp->qlen;
393 local_irq_enable();
390} 394}
391 395
392static void rcu_offline_cpu(int cpu) 396static void rcu_offline_cpu(int cpu)
@@ -516,10 +520,38 @@ void rcu_check_callbacks(int cpu, int user)
516 if (user || 520 if (user ||
517 (idle_cpu(cpu) && !in_softirq() && 521 (idle_cpu(cpu) && !in_softirq() &&
518 hardirq_count() <= (1 << HARDIRQ_SHIFT))) { 522 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
523
524 /*
525 * Get here if this CPU took its interrupt from user
526 * mode or from the idle loop, and if this is not a
527 * nested interrupt. In this case, the CPU is in
528 * a quiescent state, so count it.
529 *
530 * Also do a memory barrier. This is needed to handle
531 * the case where writes from a preempt-disable section
532 * of code get reordered into schedule() by this CPU's
533 * write buffer. The memory barrier makes sure that
534 * the rcu_qsctr_inc() and rcu_bh_qsctr_inc() are see
535 * by other CPUs to happen after any such write.
536 */
537
538 smp_mb(); /* See above block comment. */
519 rcu_qsctr_inc(cpu); 539 rcu_qsctr_inc(cpu);
520 rcu_bh_qsctr_inc(cpu); 540 rcu_bh_qsctr_inc(cpu);
521 } else if (!in_softirq()) 541
542 } else if (!in_softirq()) {
543
544 /*
545 * Get here if this CPU did not take its interrupt from
546 * softirq, in other words, if it is not interrupting
547 * a rcu_bh read-side critical section. This is an _bh
548 * critical section, so count it. The memory barrier
549 * is needed for the same reason as is the above one.
550 */
551
552 smp_mb(); /* See above block comment. */
522 rcu_bh_qsctr_inc(cpu); 553 rcu_bh_qsctr_inc(cpu);
554 }
523 raise_rcu_softirq(); 555 raise_rcu_softirq();
524} 556}
525 557
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index c09605f8d16c..f14f372cf6f5 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -39,16 +39,16 @@
39#include <linux/sched.h> 39#include <linux/sched.h>
40#include <asm/atomic.h> 40#include <asm/atomic.h>
41#include <linux/bitops.h> 41#include <linux/bitops.h>
42#include <linux/completion.h>
43#include <linux/percpu.h> 42#include <linux/percpu.h>
44#include <linux/notifier.h> 43#include <linux/notifier.h>
45#include <linux/cpu.h> 44#include <linux/cpu.h>
46#include <linux/mutex.h> 45#include <linux/mutex.h>
47#include <linux/module.h> 46#include <linux/module.h>
48 47
49struct rcu_synchronize { 48enum rcu_barrier {
50 struct rcu_head head; 49 RCU_BARRIER_STD,
51 struct completion completion; 50 RCU_BARRIER_BH,
51 RCU_BARRIER_SCHED,
52}; 52};
53 53
54static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; 54static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
@@ -60,7 +60,7 @@ static struct completion rcu_barrier_completion;
60 * Awaken the corresponding synchronize_rcu() instance now that a 60 * Awaken the corresponding synchronize_rcu() instance now that a
61 * grace period has elapsed. 61 * grace period has elapsed.
62 */ 62 */
63static void wakeme_after_rcu(struct rcu_head *head) 63void wakeme_after_rcu(struct rcu_head *head)
64{ 64{
65 struct rcu_synchronize *rcu; 65 struct rcu_synchronize *rcu;
66 66
@@ -77,17 +77,7 @@ static void wakeme_after_rcu(struct rcu_head *head)
77 * sections are delimited by rcu_read_lock() and rcu_read_unlock(), 77 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
78 * and may be nested. 78 * and may be nested.
79 */ 79 */
80void synchronize_rcu(void) 80synchronize_rcu_xxx(synchronize_rcu, call_rcu)
81{
82 struct rcu_synchronize rcu;
83
84 init_completion(&rcu.completion);
85 /* Will wake me after RCU finished */
86 call_rcu(&rcu.head, wakeme_after_rcu);
87
88 /* Wait for it */
89 wait_for_completion(&rcu.completion);
90}
91EXPORT_SYMBOL_GPL(synchronize_rcu); 81EXPORT_SYMBOL_GPL(synchronize_rcu);
92 82
93static void rcu_barrier_callback(struct rcu_head *notused) 83static void rcu_barrier_callback(struct rcu_head *notused)
@@ -99,19 +89,30 @@ static void rcu_barrier_callback(struct rcu_head *notused)
99/* 89/*
100 * Called with preemption disabled, and from cross-cpu IRQ context. 90 * Called with preemption disabled, and from cross-cpu IRQ context.
101 */ 91 */
102static void rcu_barrier_func(void *notused) 92static void rcu_barrier_func(void *type)
103{ 93{
104 int cpu = smp_processor_id(); 94 int cpu = smp_processor_id();
105 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); 95 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
106 96
107 atomic_inc(&rcu_barrier_cpu_count); 97 atomic_inc(&rcu_barrier_cpu_count);
108 call_rcu(head, rcu_barrier_callback); 98 switch ((enum rcu_barrier)type) {
99 case RCU_BARRIER_STD:
100 call_rcu(head, rcu_barrier_callback);
101 break;
102 case RCU_BARRIER_BH:
103 call_rcu_bh(head, rcu_barrier_callback);
104 break;
105 case RCU_BARRIER_SCHED:
106 call_rcu_sched(head, rcu_barrier_callback);
107 break;
108 }
109} 109}
110 110
111/** 111/*
112 * rcu_barrier - Wait until all the in-flight RCUs are complete. 112 * Orchestrate the specified type of RCU barrier, waiting for all
113 * RCU callbacks of the specified type to complete.
113 */ 114 */
114void rcu_barrier(void) 115static void _rcu_barrier(enum rcu_barrier type)
115{ 116{
116 BUG_ON(in_interrupt()); 117 BUG_ON(in_interrupt());
117 /* Take cpucontrol mutex to protect against CPU hotplug */ 118 /* Take cpucontrol mutex to protect against CPU hotplug */
@@ -127,13 +128,39 @@ void rcu_barrier(void)
127 * until all the callbacks are queued. 128 * until all the callbacks are queued.
128 */ 129 */
129 rcu_read_lock(); 130 rcu_read_lock();
130 on_each_cpu(rcu_barrier_func, NULL, 0, 1); 131 on_each_cpu(rcu_barrier_func, (void *)type, 1);
131 rcu_read_unlock(); 132 rcu_read_unlock();
132 wait_for_completion(&rcu_barrier_completion); 133 wait_for_completion(&rcu_barrier_completion);
133 mutex_unlock(&rcu_barrier_mutex); 134 mutex_unlock(&rcu_barrier_mutex);
134} 135}
136
137/**
138 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
139 */
140void rcu_barrier(void)
141{
142 _rcu_barrier(RCU_BARRIER_STD);
143}
135EXPORT_SYMBOL_GPL(rcu_barrier); 144EXPORT_SYMBOL_GPL(rcu_barrier);
136 145
146/**
147 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
148 */
149void rcu_barrier_bh(void)
150{
151 _rcu_barrier(RCU_BARRIER_BH);
152}
153EXPORT_SYMBOL_GPL(rcu_barrier_bh);
154
155/**
156 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
157 */
158void rcu_barrier_sched(void)
159{
160 _rcu_barrier(RCU_BARRIER_SCHED);
161}
162EXPORT_SYMBOL_GPL(rcu_barrier_sched);
163
137void __init rcu_init(void) 164void __init rcu_init(void)
138{ 165{
139 __rcu_init(); 166 __rcu_init();
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 9bf445664457..6f62b77d93c4 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -46,11 +46,11 @@
46#include <asm/atomic.h> 46#include <asm/atomic.h>
47#include <linux/bitops.h> 47#include <linux/bitops.h>
48#include <linux/module.h> 48#include <linux/module.h>
49#include <linux/kthread.h>
49#include <linux/completion.h> 50#include <linux/completion.h>
50#include <linux/moduleparam.h> 51#include <linux/moduleparam.h>
51#include <linux/percpu.h> 52#include <linux/percpu.h>
52#include <linux/notifier.h> 53#include <linux/notifier.h>
53#include <linux/rcupdate.h>
54#include <linux/cpu.h> 54#include <linux/cpu.h>
55#include <linux/random.h> 55#include <linux/random.h>
56#include <linux/delay.h> 56#include <linux/delay.h>
@@ -82,14 +82,18 @@ struct rcu_data {
82 spinlock_t lock; /* Protect rcu_data fields. */ 82 spinlock_t lock; /* Protect rcu_data fields. */
83 long completed; /* Number of last completed batch. */ 83 long completed; /* Number of last completed batch. */
84 int waitlistcount; 84 int waitlistcount;
85 struct tasklet_struct rcu_tasklet;
86 struct rcu_head *nextlist; 85 struct rcu_head *nextlist;
87 struct rcu_head **nexttail; 86 struct rcu_head **nexttail;
88 struct rcu_head *waitlist[GP_STAGES]; 87 struct rcu_head *waitlist[GP_STAGES];
89 struct rcu_head **waittail[GP_STAGES]; 88 struct rcu_head **waittail[GP_STAGES];
90 struct rcu_head *donelist; 89 struct rcu_head *donelist; /* from waitlist & waitschedlist */
91 struct rcu_head **donetail; 90 struct rcu_head **donetail;
92 long rcu_flipctr[2]; 91 long rcu_flipctr[2];
92 struct rcu_head *nextschedlist;
93 struct rcu_head **nextschedtail;
94 struct rcu_head *waitschedlist;
95 struct rcu_head **waitschedtail;
96 int rcu_sched_sleeping;
93#ifdef CONFIG_RCU_TRACE 97#ifdef CONFIG_RCU_TRACE
94 struct rcupreempt_trace trace; 98 struct rcupreempt_trace trace;
95#endif /* #ifdef CONFIG_RCU_TRACE */ 99#endif /* #ifdef CONFIG_RCU_TRACE */
@@ -131,11 +135,24 @@ enum rcu_try_flip_states {
131 rcu_try_flip_waitmb_state, 135 rcu_try_flip_waitmb_state,
132}; 136};
133 137
138/*
139 * States for rcu_ctrlblk.rcu_sched_sleep.
140 */
141
142enum rcu_sched_sleep_states {
143 rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */
144 rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */
145 rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */
146};
147
134struct rcu_ctrlblk { 148struct rcu_ctrlblk {
135 spinlock_t fliplock; /* Protect state-machine transitions. */ 149 spinlock_t fliplock; /* Protect state-machine transitions. */
136 long completed; /* Number of last completed batch. */ 150 long completed; /* Number of last completed batch. */
137 enum rcu_try_flip_states rcu_try_flip_state; /* The current state of 151 enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
138 the rcu state machine */ 152 the rcu state machine */
153 spinlock_t schedlock; /* Protect rcu_sched sleep state. */
154 enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */
155 wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */
139}; 156};
140 157
141static DEFINE_PER_CPU(struct rcu_data, rcu_data); 158static DEFINE_PER_CPU(struct rcu_data, rcu_data);
@@ -143,8 +160,12 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
143 .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), 160 .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
144 .completed = 0, 161 .completed = 0,
145 .rcu_try_flip_state = rcu_try_flip_idle_state, 162 .rcu_try_flip_state = rcu_try_flip_idle_state,
163 .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock),
164 .sched_sleep = rcu_sched_not_sleeping,
165 .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq),
146}; 166};
147 167
168static struct task_struct *rcu_sched_grace_period_task;
148 169
149#ifdef CONFIG_RCU_TRACE 170#ifdef CONFIG_RCU_TRACE
150static char *rcu_try_flip_state_names[] = 171static char *rcu_try_flip_state_names[] =
@@ -207,6 +228,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
207 */ 228 */
208#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace)); 229#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
209 230
231#define RCU_SCHED_BATCH_TIME (HZ / 50)
232
210/* 233/*
211 * Return the number of RCU batches processed thus far. Useful 234 * Return the number of RCU batches processed thus far. Useful
212 * for debug and statistics. 235 * for debug and statistics.
@@ -411,32 +434,34 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp)
411 } 434 }
412} 435}
413 436
414#ifdef CONFIG_NO_HZ 437DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
438 .dynticks = 1,
439};
415 440
416DEFINE_PER_CPU(long, dynticks_progress_counter) = 1; 441#ifdef CONFIG_NO_HZ
417static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
418static DEFINE_PER_CPU(int, rcu_update_flag); 442static DEFINE_PER_CPU(int, rcu_update_flag);
419 443
420/** 444/**
421 * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. 445 * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
422 * 446 *
423 * If the CPU was idle with dynamic ticks active, this updates the 447 * If the CPU was idle with dynamic ticks active, this updates the
424 * dynticks_progress_counter to let the RCU handling know that the 448 * rcu_dyntick_sched.dynticks to let the RCU handling know that the
425 * CPU is active. 449 * CPU is active.
426 */ 450 */
427void rcu_irq_enter(void) 451void rcu_irq_enter(void)
428{ 452{
429 int cpu = smp_processor_id(); 453 int cpu = smp_processor_id();
454 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
430 455
431 if (per_cpu(rcu_update_flag, cpu)) 456 if (per_cpu(rcu_update_flag, cpu))
432 per_cpu(rcu_update_flag, cpu)++; 457 per_cpu(rcu_update_flag, cpu)++;
433 458
434 /* 459 /*
435 * Only update if we are coming from a stopped ticks mode 460 * Only update if we are coming from a stopped ticks mode
436 * (dynticks_progress_counter is even). 461 * (rcu_dyntick_sched.dynticks is even).
437 */ 462 */
438 if (!in_interrupt() && 463 if (!in_interrupt() &&
439 (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) { 464 (rdssp->dynticks & 0x1) == 0) {
440 /* 465 /*
441 * The following might seem like we could have a race 466 * The following might seem like we could have a race
442 * with NMI/SMIs. But this really isn't a problem. 467 * with NMI/SMIs. But this really isn't a problem.
@@ -459,12 +484,12 @@ void rcu_irq_enter(void)
459 * RCU read-side critical sections on this CPU would 484 * RCU read-side critical sections on this CPU would
460 * have already completed. 485 * have already completed.
461 */ 486 */
462 per_cpu(dynticks_progress_counter, cpu)++; 487 rdssp->dynticks++;
463 /* 488 /*
464 * The following memory barrier ensures that any 489 * The following memory barrier ensures that any
465 * rcu_read_lock() primitives in the irq handler 490 * rcu_read_lock() primitives in the irq handler
466 * are seen by other CPUs to follow the above 491 * are seen by other CPUs to follow the above
467 * increment to dynticks_progress_counter. This is 492 * increment to rcu_dyntick_sched.dynticks. This is
468 * required in order for other CPUs to correctly 493 * required in order for other CPUs to correctly
469 * determine when it is safe to advance the RCU 494 * determine when it is safe to advance the RCU
470 * grace-period state machine. 495 * grace-period state machine.
@@ -472,7 +497,7 @@ void rcu_irq_enter(void)
472 smp_mb(); /* see above block comment. */ 497 smp_mb(); /* see above block comment. */
473 /* 498 /*
474 * Since we can't determine the dynamic tick mode from 499 * Since we can't determine the dynamic tick mode from
475 * the dynticks_progress_counter after this routine, 500 * the rcu_dyntick_sched.dynticks after this routine,
476 * we use a second flag to acknowledge that we came 501 * we use a second flag to acknowledge that we came
477 * from an idle state with ticks stopped. 502 * from an idle state with ticks stopped.
478 */ 503 */
@@ -480,7 +505,7 @@ void rcu_irq_enter(void)
480 /* 505 /*
481 * If we take an NMI/SMI now, they will also increment 506 * If we take an NMI/SMI now, they will also increment
482 * the rcu_update_flag, and will not update the 507 * the rcu_update_flag, and will not update the
483 * dynticks_progress_counter on exit. That is for 508 * rcu_dyntick_sched.dynticks on exit. That is for
484 * this IRQ to do. 509 * this IRQ to do.
485 */ 510 */
486 } 511 }
@@ -490,12 +515,13 @@ void rcu_irq_enter(void)
490 * rcu_irq_exit - Called from exiting Hard irq context. 515 * rcu_irq_exit - Called from exiting Hard irq context.
491 * 516 *
492 * If the CPU was idle with dynamic ticks active, update the 517 * If the CPU was idle with dynamic ticks active, update the
493 * dynticks_progress_counter to put let the RCU handling be 518 * rcu_dyntick_sched.dynticks to put let the RCU handling be
494 * aware that the CPU is going back to idle with no ticks. 519 * aware that the CPU is going back to idle with no ticks.
495 */ 520 */
496void rcu_irq_exit(void) 521void rcu_irq_exit(void)
497{ 522{
498 int cpu = smp_processor_id(); 523 int cpu = smp_processor_id();
524 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
499 525
500 /* 526 /*
501 * rcu_update_flag is set if we interrupted the CPU 527 * rcu_update_flag is set if we interrupted the CPU
@@ -503,7 +529,7 @@ void rcu_irq_exit(void)
503 * Once this occurs, we keep track of interrupt nesting 529 * Once this occurs, we keep track of interrupt nesting
504 * because a NMI/SMI could also come in, and we still 530 * because a NMI/SMI could also come in, and we still
505 * only want the IRQ that started the increment of the 531 * only want the IRQ that started the increment of the
506 * dynticks_progress_counter to be the one that modifies 532 * rcu_dyntick_sched.dynticks to be the one that modifies
507 * it on exit. 533 * it on exit.
508 */ 534 */
509 if (per_cpu(rcu_update_flag, cpu)) { 535 if (per_cpu(rcu_update_flag, cpu)) {
@@ -515,28 +541,29 @@ void rcu_irq_exit(void)
515 541
516 /* 542 /*
517 * If an NMI/SMI happens now we are still 543 * If an NMI/SMI happens now we are still
518 * protected by the dynticks_progress_counter being odd. 544 * protected by the rcu_dyntick_sched.dynticks being odd.
519 */ 545 */
520 546
521 /* 547 /*
522 * The following memory barrier ensures that any 548 * The following memory barrier ensures that any
523 * rcu_read_unlock() primitives in the irq handler 549 * rcu_read_unlock() primitives in the irq handler
524 * are seen by other CPUs to preceed the following 550 * are seen by other CPUs to preceed the following
525 * increment to dynticks_progress_counter. This 551 * increment to rcu_dyntick_sched.dynticks. This
526 * is required in order for other CPUs to determine 552 * is required in order for other CPUs to determine
527 * when it is safe to advance the RCU grace-period 553 * when it is safe to advance the RCU grace-period
528 * state machine. 554 * state machine.
529 */ 555 */
530 smp_mb(); /* see above block comment. */ 556 smp_mb(); /* see above block comment. */
531 per_cpu(dynticks_progress_counter, cpu)++; 557 rdssp->dynticks++;
532 WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1); 558 WARN_ON(rdssp->dynticks & 0x1);
533 } 559 }
534} 560}
535 561
536static void dyntick_save_progress_counter(int cpu) 562static void dyntick_save_progress_counter(int cpu)
537{ 563{
538 per_cpu(rcu_dyntick_snapshot, cpu) = 564 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
539 per_cpu(dynticks_progress_counter, cpu); 565
566 rdssp->dynticks_snap = rdssp->dynticks;
540} 567}
541 568
542static inline int 569static inline int
@@ -544,9 +571,10 @@ rcu_try_flip_waitack_needed(int cpu)
544{ 571{
545 long curr; 572 long curr;
546 long snap; 573 long snap;
574 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
547 575
548 curr = per_cpu(dynticks_progress_counter, cpu); 576 curr = rdssp->dynticks;
549 snap = per_cpu(rcu_dyntick_snapshot, cpu); 577 snap = rdssp->dynticks_snap;
550 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ 578 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
551 579
552 /* 580 /*
@@ -567,7 +595,7 @@ rcu_try_flip_waitack_needed(int cpu)
567 * that this CPU already acknowledged the counter. 595 * that this CPU already acknowledged the counter.
568 */ 596 */
569 597
570 if ((curr - snap) > 2 || (snap & 0x1) == 0) 598 if ((curr - snap) > 2 || (curr & 0x1) == 0)
571 return 0; 599 return 0;
572 600
573 /* We need this CPU to explicitly acknowledge the counter flip. */ 601 /* We need this CPU to explicitly acknowledge the counter flip. */
@@ -580,9 +608,10 @@ rcu_try_flip_waitmb_needed(int cpu)
580{ 608{
581 long curr; 609 long curr;
582 long snap; 610 long snap;
611 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
583 612
584 curr = per_cpu(dynticks_progress_counter, cpu); 613 curr = rdssp->dynticks;
585 snap = per_cpu(rcu_dyntick_snapshot, cpu); 614 snap = rdssp->dynticks_snap;
586 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ 615 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
587 616
588 /* 617 /*
@@ -609,14 +638,86 @@ rcu_try_flip_waitmb_needed(int cpu)
609 return 1; 638 return 1;
610} 639}
611 640
641static void dyntick_save_progress_counter_sched(int cpu)
642{
643 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
644
645 rdssp->sched_dynticks_snap = rdssp->dynticks;
646}
647
648static int rcu_qsctr_inc_needed_dyntick(int cpu)
649{
650 long curr;
651 long snap;
652 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
653
654 curr = rdssp->dynticks;
655 snap = rdssp->sched_dynticks_snap;
656 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
657
658 /*
659 * If the CPU remained in dynticks mode for the entire time
660 * and didn't take any interrupts, NMIs, SMIs, or whatever,
661 * then it cannot be in the middle of an rcu_read_lock(), so
662 * the next rcu_read_lock() it executes must use the new value
663 * of the counter. Therefore, this CPU has been in a quiescent
664 * state the entire time, and we don't need to wait for it.
665 */
666
667 if ((curr == snap) && ((curr & 0x1) == 0))
668 return 0;
669
670 /*
671 * If the CPU passed through or entered a dynticks idle phase with
672 * no active irq handlers, then, as above, this CPU has already
673 * passed through a quiescent state.
674 */
675
676 if ((curr - snap) > 2 || (snap & 0x1) == 0)
677 return 0;
678
679 /* We need this CPU to go through a quiescent state. */
680
681 return 1;
682}
683
612#else /* !CONFIG_NO_HZ */ 684#else /* !CONFIG_NO_HZ */
613 685
614# define dyntick_save_progress_counter(cpu) do { } while (0) 686# define dyntick_save_progress_counter(cpu) do { } while (0)
615# define rcu_try_flip_waitack_needed(cpu) (1) 687# define rcu_try_flip_waitack_needed(cpu) (1)
616# define rcu_try_flip_waitmb_needed(cpu) (1) 688# define rcu_try_flip_waitmb_needed(cpu) (1)
689
690# define dyntick_save_progress_counter_sched(cpu) do { } while (0)
691# define rcu_qsctr_inc_needed_dyntick(cpu) (1)
617 692
618#endif /* CONFIG_NO_HZ */ 693#endif /* CONFIG_NO_HZ */
619 694
695static void save_qsctr_sched(int cpu)
696{
697 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
698
699 rdssp->sched_qs_snap = rdssp->sched_qs;
700}
701
702static inline int rcu_qsctr_inc_needed(int cpu)
703{
704 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
705
706 /*
707 * If there has been a quiescent state, no more need to wait
708 * on this CPU.
709 */
710
711 if (rdssp->sched_qs != rdssp->sched_qs_snap) {
712 smp_mb(); /* force ordering with cpu entering schedule(). */
713 return 0;
714 }
715
716 /* We need this CPU to go through a quiescent state. */
717
718 return 1;
719}
720
620/* 721/*
621 * Get here when RCU is idle. Decide whether we need to 722 * Get here when RCU is idle. Decide whether we need to
622 * move out of idle state, and return non-zero if so. 723 * move out of idle state, and return non-zero if so.
@@ -819,6 +920,26 @@ void rcu_check_callbacks(int cpu, int user)
819 unsigned long flags; 920 unsigned long flags;
820 struct rcu_data *rdp = RCU_DATA_CPU(cpu); 921 struct rcu_data *rdp = RCU_DATA_CPU(cpu);
821 922
923 /*
924 * If this CPU took its interrupt from user mode or from the
925 * idle loop, and this is not a nested interrupt, then
926 * this CPU has to have exited all prior preept-disable
927 * sections of code. So increment the counter to note this.
928 *
929 * The memory barrier is needed to handle the case where
930 * writes from a preempt-disable section of code get reordered
931 * into schedule() by this CPU's write buffer. So the memory
932 * barrier makes sure that the rcu_qsctr_inc() is seen by other
933 * CPUs to happen after any such write.
934 */
935
936 if (user ||
937 (idle_cpu(cpu) && !in_softirq() &&
938 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
939 smp_mb(); /* Guard against aggressive schedule(). */
940 rcu_qsctr_inc(cpu);
941 }
942
822 rcu_check_mb(cpu); 943 rcu_check_mb(cpu);
823 if (rcu_ctrlblk.completed == rdp->completed) 944 if (rcu_ctrlblk.completed == rdp->completed)
824 rcu_try_flip(); 945 rcu_try_flip();
@@ -869,6 +990,8 @@ void rcu_offline_cpu(int cpu)
869 struct rcu_head *list = NULL; 990 struct rcu_head *list = NULL;
870 unsigned long flags; 991 unsigned long flags;
871 struct rcu_data *rdp = RCU_DATA_CPU(cpu); 992 struct rcu_data *rdp = RCU_DATA_CPU(cpu);
993 struct rcu_head *schedlist = NULL;
994 struct rcu_head **schedtail = &schedlist;
872 struct rcu_head **tail = &list; 995 struct rcu_head **tail = &list;
873 996
874 /* 997 /*
@@ -882,6 +1005,11 @@ void rcu_offline_cpu(int cpu)
882 rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i], 1005 rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
883 list, tail); 1006 list, tail);
884 rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail); 1007 rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
1008 rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail,
1009 schedlist, schedtail);
1010 rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail,
1011 schedlist, schedtail);
1012 rdp->rcu_sched_sleeping = 0;
885 spin_unlock_irqrestore(&rdp->lock, flags); 1013 spin_unlock_irqrestore(&rdp->lock, flags);
886 rdp->waitlistcount = 0; 1014 rdp->waitlistcount = 0;
887 1015
@@ -916,12 +1044,15 @@ void rcu_offline_cpu(int cpu)
916 * fix. 1044 * fix.
917 */ 1045 */
918 1046
919 local_irq_save(flags); 1047 local_irq_save(flags); /* disable preempt till we know what lock. */
920 rdp = RCU_DATA_ME(); 1048 rdp = RCU_DATA_ME();
921 spin_lock(&rdp->lock); 1049 spin_lock(&rdp->lock);
922 *rdp->nexttail = list; 1050 *rdp->nexttail = list;
923 if (list) 1051 if (list)
924 rdp->nexttail = tail; 1052 rdp->nexttail = tail;
1053 *rdp->nextschedtail = schedlist;
1054 if (schedlist)
1055 rdp->nextschedtail = schedtail;
925 spin_unlock_irqrestore(&rdp->lock, flags); 1056 spin_unlock_irqrestore(&rdp->lock, flags);
926} 1057}
927 1058
@@ -936,10 +1067,25 @@ void rcu_offline_cpu(int cpu)
936void __cpuinit rcu_online_cpu(int cpu) 1067void __cpuinit rcu_online_cpu(int cpu)
937{ 1068{
938 unsigned long flags; 1069 unsigned long flags;
1070 struct rcu_data *rdp;
939 1071
940 spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); 1072 spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
941 cpu_set(cpu, rcu_cpu_online_map); 1073 cpu_set(cpu, rcu_cpu_online_map);
942 spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); 1074 spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
1075
1076 /*
1077 * The rcu_sched grace-period processing might have bypassed
1078 * this CPU, given that it was not in the rcu_cpu_online_map
1079 * when the grace-period scan started. This means that the
1080 * grace-period task might sleep. So make sure that if this
1081 * should happen, the first callback posted to this CPU will
1082 * wake up the grace-period task if need be.
1083 */
1084
1085 rdp = RCU_DATA_CPU(cpu);
1086 spin_lock_irqsave(&rdp->lock, flags);
1087 rdp->rcu_sched_sleeping = 1;
1088 spin_unlock_irqrestore(&rdp->lock, flags);
943} 1089}
944 1090
945static void rcu_process_callbacks(struct softirq_action *unused) 1091static void rcu_process_callbacks(struct softirq_action *unused)
@@ -982,31 +1128,196 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
982 *rdp->nexttail = head; 1128 *rdp->nexttail = head;
983 rdp->nexttail = &head->next; 1129 rdp->nexttail = &head->next;
984 RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp); 1130 RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
985 spin_unlock(&rdp->lock); 1131 spin_unlock_irqrestore(&rdp->lock, flags);
986 local_irq_restore(flags);
987} 1132}
988EXPORT_SYMBOL_GPL(call_rcu); 1133EXPORT_SYMBOL_GPL(call_rcu);
989 1134
1135void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1136{
1137 unsigned long flags;
1138 struct rcu_data *rdp;
1139 int wake_gp = 0;
1140
1141 head->func = func;
1142 head->next = NULL;
1143 local_irq_save(flags);
1144 rdp = RCU_DATA_ME();
1145 spin_lock(&rdp->lock);
1146 *rdp->nextschedtail = head;
1147 rdp->nextschedtail = &head->next;
1148 if (rdp->rcu_sched_sleeping) {
1149
1150 /* Grace-period processing might be sleeping... */
1151
1152 rdp->rcu_sched_sleeping = 0;
1153 wake_gp = 1;
1154 }
1155 spin_unlock_irqrestore(&rdp->lock, flags);
1156 if (wake_gp) {
1157
1158 /* Wake up grace-period processing, unless someone beat us. */
1159
1160 spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
1161 if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping)
1162 wake_gp = 0;
1163 rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping;
1164 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1165 if (wake_gp)
1166 wake_up_interruptible(&rcu_ctrlblk.sched_wq);
1167 }
1168}
1169EXPORT_SYMBOL_GPL(call_rcu_sched);
1170
990/* 1171/*
991 * Wait until all currently running preempt_disable() code segments 1172 * Wait until all currently running preempt_disable() code segments
992 * (including hardware-irq-disable segments) complete. Note that 1173 * (including hardware-irq-disable segments) complete. Note that
993 * in -rt this does -not- necessarily result in all currently executing 1174 * in -rt this does -not- necessarily result in all currently executing
994 * interrupt -handlers- having completed. 1175 * interrupt -handlers- having completed.
995 */ 1176 */
996void __synchronize_sched(void) 1177synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched)
1178EXPORT_SYMBOL_GPL(__synchronize_sched);
1179
1180/*
1181 * kthread function that manages call_rcu_sched grace periods.
1182 */
1183static int rcu_sched_grace_period(void *arg)
997{ 1184{
998 cpumask_t oldmask; 1185 int couldsleep; /* might sleep after current pass. */
1186 int couldsleepnext = 0; /* might sleep after next pass. */
999 int cpu; 1187 int cpu;
1188 unsigned long flags;
1189 struct rcu_data *rdp;
1190 int ret;
1000 1191
1001 if (sched_getaffinity(0, &oldmask) < 0) 1192 /*
1002 oldmask = cpu_possible_map; 1193 * Each pass through the following loop handles one
1003 for_each_online_cpu(cpu) { 1194 * rcu_sched grace period cycle.
1004 sched_setaffinity(0, &cpumask_of_cpu(cpu)); 1195 */
1005 schedule(); 1196 do {
1006 } 1197 /* Save each CPU's current state. */
1007 sched_setaffinity(0, &oldmask); 1198
1199 for_each_online_cpu(cpu) {
1200 dyntick_save_progress_counter_sched(cpu);
1201 save_qsctr_sched(cpu);
1202 }
1203
1204 /*
1205 * Sleep for about an RCU grace-period's worth to
1206 * allow better batching and to consume less CPU.
1207 */
1208 schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME);
1209
1210 /*
1211 * If there was nothing to do last time, prepare to
1212 * sleep at the end of the current grace period cycle.
1213 */
1214 couldsleep = couldsleepnext;
1215 couldsleepnext = 1;
1216 if (couldsleep) {
1217 spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
1218 rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep;
1219 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1220 }
1221
1222 /*
1223 * Wait on each CPU in turn to have either visited
1224 * a quiescent state or been in dynticks-idle mode.
1225 */
1226 for_each_online_cpu(cpu) {
1227 while (rcu_qsctr_inc_needed(cpu) &&
1228 rcu_qsctr_inc_needed_dyntick(cpu)) {
1229 /* resched_cpu(cpu); @@@ */
1230 schedule_timeout_interruptible(1);
1231 }
1232 }
1233
1234 /* Advance callbacks for each CPU. */
1235
1236 for_each_online_cpu(cpu) {
1237
1238 rdp = RCU_DATA_CPU(cpu);
1239 spin_lock_irqsave(&rdp->lock, flags);
1240
1241 /*
1242 * We are running on this CPU irq-disabled, so no
1243 * CPU can go offline until we re-enable irqs.
1244 * The current CPU might have already gone
1245 * offline (between the for_each_offline_cpu and
1246 * the spin_lock_irqsave), but in that case all its
1247 * callback lists will be empty, so no harm done.
1248 *
1249 * Advance the callbacks! We share normal RCU's
1250 * donelist, since callbacks are invoked the
1251 * same way in either case.
1252 */
1253 if (rdp->waitschedlist != NULL) {
1254 *rdp->donetail = rdp->waitschedlist;
1255 rdp->donetail = rdp->waitschedtail;
1256
1257 /*
1258 * Next rcu_check_callbacks() will
1259 * do the required raise_softirq().
1260 */
1261 }
1262 if (rdp->nextschedlist != NULL) {
1263 rdp->waitschedlist = rdp->nextschedlist;
1264 rdp->waitschedtail = rdp->nextschedtail;
1265 couldsleep = 0;
1266 couldsleepnext = 0;
1267 } else {
1268 rdp->waitschedlist = NULL;
1269 rdp->waitschedtail = &rdp->waitschedlist;
1270 }
1271 rdp->nextschedlist = NULL;
1272 rdp->nextschedtail = &rdp->nextschedlist;
1273
1274 /* Mark sleep intention. */
1275
1276 rdp->rcu_sched_sleeping = couldsleep;
1277
1278 spin_unlock_irqrestore(&rdp->lock, flags);
1279 }
1280
1281 /* If we saw callbacks on the last scan, go deal with them. */
1282
1283 if (!couldsleep)
1284 continue;
1285
1286 /* Attempt to block... */
1287
1288 spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
1289 if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) {
1290
1291 /*
1292 * Someone posted a callback after we scanned.
1293 * Go take care of it.
1294 */
1295 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1296 couldsleepnext = 0;
1297 continue;
1298 }
1299
1300 /* Block until the next person posts a callback. */
1301
1302 rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
1303 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1304 ret = 0;
1305 __wait_event_interruptible(rcu_ctrlblk.sched_wq,
1306 rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
1307 ret);
1308
1309 /*
1310 * Signals would prevent us from sleeping, and we cannot
1311 * do much with them in any case. So flush them.
1312 */
1313 if (ret)
1314 flush_signals(current);
1315 couldsleepnext = 0;
1316
1317 } while (!kthread_should_stop());
1318
1319 return (0);
1008} 1320}
1009EXPORT_SYMBOL_GPL(__synchronize_sched);
1010 1321
1011/* 1322/*
1012 * Check to see if any future RCU-related work will need to be done 1323 * Check to see if any future RCU-related work will need to be done
@@ -1023,7 +1334,9 @@ int rcu_needs_cpu(int cpu)
1023 1334
1024 return (rdp->donelist != NULL || 1335 return (rdp->donelist != NULL ||
1025 !!rdp->waitlistcount || 1336 !!rdp->waitlistcount ||
1026 rdp->nextlist != NULL); 1337 rdp->nextlist != NULL ||
1338 rdp->nextschedlist != NULL ||
1339 rdp->waitschedlist != NULL);
1027} 1340}
1028 1341
1029int rcu_pending(int cpu) 1342int rcu_pending(int cpu)
@@ -1034,7 +1347,9 @@ int rcu_pending(int cpu)
1034 1347
1035 if (rdp->donelist != NULL || 1348 if (rdp->donelist != NULL ||
1036 !!rdp->waitlistcount || 1349 !!rdp->waitlistcount ||
1037 rdp->nextlist != NULL) 1350 rdp->nextlist != NULL ||
1351 rdp->nextschedlist != NULL ||
1352 rdp->waitschedlist != NULL)
1038 return 1; 1353 return 1;
1039 1354
1040 /* The RCU core needs an acknowledgement from this CPU. */ 1355 /* The RCU core needs an acknowledgement from this CPU. */
@@ -1101,6 +1416,11 @@ void __init __rcu_init(void)
1101 rdp->donetail = &rdp->donelist; 1416 rdp->donetail = &rdp->donelist;
1102 rdp->rcu_flipctr[0] = 0; 1417 rdp->rcu_flipctr[0] = 0;
1103 rdp->rcu_flipctr[1] = 0; 1418 rdp->rcu_flipctr[1] = 0;
1419 rdp->nextschedlist = NULL;
1420 rdp->nextschedtail = &rdp->nextschedlist;
1421 rdp->waitschedlist = NULL;
1422 rdp->waitschedtail = &rdp->waitschedlist;
1423 rdp->rcu_sched_sleeping = 0;
1104 } 1424 }
1105 register_cpu_notifier(&rcu_nb); 1425 register_cpu_notifier(&rcu_nb);
1106 1426
@@ -1123,11 +1443,15 @@ void __init __rcu_init(void)
1123} 1443}
1124 1444
1125/* 1445/*
1126 * Deprecated, use synchronize_rcu() or synchronize_sched() instead. 1446 * Late-boot-time RCU initialization that must wait until after scheduler
1447 * has been initialized.
1127 */ 1448 */
1128void synchronize_kernel(void) 1449void __init rcu_init_sched(void)
1129{ 1450{
1130 synchronize_rcu(); 1451 rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period,
1452 NULL,
1453 "rcu_sched_grace_period");
1454 WARN_ON(IS_ERR(rcu_sched_grace_period_task));
1131} 1455}
1132 1456
1133#ifdef CONFIG_RCU_TRACE 1457#ifdef CONFIG_RCU_TRACE
diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c
index 49ac4947af24..5edf82c34bbc 100644
--- a/kernel/rcupreempt_trace.c
+++ b/kernel/rcupreempt_trace.c
@@ -38,7 +38,6 @@
38#include <linux/moduleparam.h> 38#include <linux/moduleparam.h>
39#include <linux/percpu.h> 39#include <linux/percpu.h>
40#include <linux/notifier.h> 40#include <linux/notifier.h>
41#include <linux/rcupdate.h>
42#include <linux/cpu.h> 41#include <linux/cpu.h>
43#include <linux/mutex.h> 42#include <linux/mutex.h>
44#include <linux/rcupreempt_trace.h> 43#include <linux/rcupreempt_trace.h>
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 33acc424667e..90b5b123f7a1 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -57,7 +57,9 @@ static int stat_interval; /* Interval between stats, in seconds. */
57 /* Defaults to "only at end of test". */ 57 /* Defaults to "only at end of test". */
58static int verbose; /* Print more debug info. */ 58static int verbose; /* Print more debug info. */
59static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ 59static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
60static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/ 60static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
61static int stutter = 5; /* Start/stop testing interval (in sec) */
62static int irqreader = 1; /* RCU readers from irq (timers). */
61static char *torture_type = "rcu"; /* What RCU implementation to torture. */ 63static char *torture_type = "rcu"; /* What RCU implementation to torture. */
62 64
63module_param(nreaders, int, 0444); 65module_param(nreaders, int, 0444);
@@ -72,6 +74,10 @@ module_param(test_no_idle_hz, bool, 0444);
72MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs"); 74MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
73module_param(shuffle_interval, int, 0444); 75module_param(shuffle_interval, int, 0444);
74MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles"); 76MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
77module_param(stutter, int, 0444);
78MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test");
79module_param(irqreader, int, 0444);
80MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers");
75module_param(torture_type, charp, 0444); 81module_param(torture_type, charp, 0444);
76MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); 82MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
77 83
@@ -91,6 +97,7 @@ static struct task_struct **fakewriter_tasks;
91static struct task_struct **reader_tasks; 97static struct task_struct **reader_tasks;
92static struct task_struct *stats_task; 98static struct task_struct *stats_task;
93static struct task_struct *shuffler_task; 99static struct task_struct *shuffler_task;
100static struct task_struct *stutter_task;
94 101
95#define RCU_TORTURE_PIPE_LEN 10 102#define RCU_TORTURE_PIPE_LEN 10
96 103
@@ -117,8 +124,18 @@ static atomic_t n_rcu_torture_alloc_fail;
117static atomic_t n_rcu_torture_free; 124static atomic_t n_rcu_torture_free;
118static atomic_t n_rcu_torture_mberror; 125static atomic_t n_rcu_torture_mberror;
119static atomic_t n_rcu_torture_error; 126static atomic_t n_rcu_torture_error;
127static long n_rcu_torture_timers = 0;
120static struct list_head rcu_torture_removed; 128static struct list_head rcu_torture_removed;
121 129
130static int stutter_pause_test = 0;
131
132#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
133#define RCUTORTURE_RUNNABLE_INIT 1
134#else
135#define RCUTORTURE_RUNNABLE_INIT 0
136#endif
137int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
138
122/* 139/*
123 * Allocate an element from the rcu_tortures pool. 140 * Allocate an element from the rcu_tortures pool.
124 */ 141 */
@@ -179,6 +196,16 @@ rcu_random(struct rcu_random_state *rrsp)
179 return swahw32(rrsp->rrs_state); 196 return swahw32(rrsp->rrs_state);
180} 197}
181 198
199static void
200rcu_stutter_wait(void)
201{
202 while (stutter_pause_test || !rcutorture_runnable)
203 if (rcutorture_runnable)
204 schedule_timeout_interruptible(1);
205 else
206 schedule_timeout_interruptible(round_jiffies_relative(HZ));
207}
208
182/* 209/*
183 * Operations vector for selecting different types of tests. 210 * Operations vector for selecting different types of tests.
184 */ 211 */
@@ -192,7 +219,9 @@ struct rcu_torture_ops {
192 int (*completed)(void); 219 int (*completed)(void);
193 void (*deferredfree)(struct rcu_torture *p); 220 void (*deferredfree)(struct rcu_torture *p);
194 void (*sync)(void); 221 void (*sync)(void);
222 void (*cb_barrier)(void);
195 int (*stats)(char *page); 223 int (*stats)(char *page);
224 int irqcapable;
196 char *name; 225 char *name;
197}; 226};
198static struct rcu_torture_ops *cur_ops = NULL; 227static struct rcu_torture_ops *cur_ops = NULL;
@@ -265,7 +294,9 @@ static struct rcu_torture_ops rcu_ops = {
265 .completed = rcu_torture_completed, 294 .completed = rcu_torture_completed,
266 .deferredfree = rcu_torture_deferred_free, 295 .deferredfree = rcu_torture_deferred_free,
267 .sync = synchronize_rcu, 296 .sync = synchronize_rcu,
297 .cb_barrier = rcu_barrier,
268 .stats = NULL, 298 .stats = NULL,
299 .irqcapable = 1,
269 .name = "rcu" 300 .name = "rcu"
270}; 301};
271 302
@@ -304,7 +335,9 @@ static struct rcu_torture_ops rcu_sync_ops = {
304 .completed = rcu_torture_completed, 335 .completed = rcu_torture_completed,
305 .deferredfree = rcu_sync_torture_deferred_free, 336 .deferredfree = rcu_sync_torture_deferred_free,
306 .sync = synchronize_rcu, 337 .sync = synchronize_rcu,
338 .cb_barrier = NULL,
307 .stats = NULL, 339 .stats = NULL,
340 .irqcapable = 1,
308 .name = "rcu_sync" 341 .name = "rcu_sync"
309}; 342};
310 343
@@ -364,7 +397,9 @@ static struct rcu_torture_ops rcu_bh_ops = {
364 .completed = rcu_bh_torture_completed, 397 .completed = rcu_bh_torture_completed,
365 .deferredfree = rcu_bh_torture_deferred_free, 398 .deferredfree = rcu_bh_torture_deferred_free,
366 .sync = rcu_bh_torture_synchronize, 399 .sync = rcu_bh_torture_synchronize,
400 .cb_barrier = rcu_barrier_bh,
367 .stats = NULL, 401 .stats = NULL,
402 .irqcapable = 1,
368 .name = "rcu_bh" 403 .name = "rcu_bh"
369}; 404};
370 405
@@ -377,7 +412,9 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
377 .completed = rcu_bh_torture_completed, 412 .completed = rcu_bh_torture_completed,
378 .deferredfree = rcu_sync_torture_deferred_free, 413 .deferredfree = rcu_sync_torture_deferred_free,
379 .sync = rcu_bh_torture_synchronize, 414 .sync = rcu_bh_torture_synchronize,
415 .cb_barrier = NULL,
380 .stats = NULL, 416 .stats = NULL,
417 .irqcapable = 1,
381 .name = "rcu_bh_sync" 418 .name = "rcu_bh_sync"
382}; 419};
383 420
@@ -458,6 +495,7 @@ static struct rcu_torture_ops srcu_ops = {
458 .completed = srcu_torture_completed, 495 .completed = srcu_torture_completed,
459 .deferredfree = rcu_sync_torture_deferred_free, 496 .deferredfree = rcu_sync_torture_deferred_free,
460 .sync = srcu_torture_synchronize, 497 .sync = srcu_torture_synchronize,
498 .cb_barrier = NULL,
461 .stats = srcu_torture_stats, 499 .stats = srcu_torture_stats,
462 .name = "srcu" 500 .name = "srcu"
463}; 501};
@@ -482,6 +520,11 @@ static int sched_torture_completed(void)
482 return 0; 520 return 0;
483} 521}
484 522
523static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
524{
525 call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
526}
527
485static void sched_torture_synchronize(void) 528static void sched_torture_synchronize(void)
486{ 529{
487 synchronize_sched(); 530 synchronize_sched();
@@ -494,12 +537,28 @@ static struct rcu_torture_ops sched_ops = {
494 .readdelay = rcu_read_delay, /* just reuse rcu's version. */ 537 .readdelay = rcu_read_delay, /* just reuse rcu's version. */
495 .readunlock = sched_torture_read_unlock, 538 .readunlock = sched_torture_read_unlock,
496 .completed = sched_torture_completed, 539 .completed = sched_torture_completed,
497 .deferredfree = rcu_sync_torture_deferred_free, 540 .deferredfree = rcu_sched_torture_deferred_free,
498 .sync = sched_torture_synchronize, 541 .sync = sched_torture_synchronize,
542 .cb_barrier = rcu_barrier_sched,
499 .stats = NULL, 543 .stats = NULL,
544 .irqcapable = 1,
500 .name = "sched" 545 .name = "sched"
501}; 546};
502 547
548static struct rcu_torture_ops sched_ops_sync = {
549 .init = rcu_sync_torture_init,
550 .cleanup = NULL,
551 .readlock = sched_torture_read_lock,
552 .readdelay = rcu_read_delay, /* just reuse rcu's version. */
553 .readunlock = sched_torture_read_unlock,
554 .completed = sched_torture_completed,
555 .deferredfree = rcu_sync_torture_deferred_free,
556 .sync = sched_torture_synchronize,
557 .cb_barrier = NULL,
558 .stats = NULL,
559 .name = "sched_sync"
560};
561
503/* 562/*
504 * RCU torture writer kthread. Repeatedly substitutes a new structure 563 * RCU torture writer kthread. Repeatedly substitutes a new structure
505 * for that pointed to by rcu_torture_current, freeing the old structure 564 * for that pointed to by rcu_torture_current, freeing the old structure
@@ -537,6 +596,7 @@ rcu_torture_writer(void *arg)
537 } 596 }
538 rcu_torture_current_version++; 597 rcu_torture_current_version++;
539 oldbatch = cur_ops->completed(); 598 oldbatch = cur_ops->completed();
599 rcu_stutter_wait();
540 } while (!kthread_should_stop() && !fullstop); 600 } while (!kthread_should_stop() && !fullstop);
541 VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping"); 601 VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping");
542 while (!kthread_should_stop()) 602 while (!kthread_should_stop())
@@ -560,6 +620,7 @@ rcu_torture_fakewriter(void *arg)
560 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); 620 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
561 udelay(rcu_random(&rand) & 0x3ff); 621 udelay(rcu_random(&rand) & 0x3ff);
562 cur_ops->sync(); 622 cur_ops->sync();
623 rcu_stutter_wait();
563 } while (!kthread_should_stop() && !fullstop); 624 } while (!kthread_should_stop() && !fullstop);
564 625
565 VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping"); 626 VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping");
@@ -569,6 +630,52 @@ rcu_torture_fakewriter(void *arg)
569} 630}
570 631
571/* 632/*
633 * RCU torture reader from timer handler. Dereferences rcu_torture_current,
634 * incrementing the corresponding element of the pipeline array. The
635 * counter in the element should never be greater than 1, otherwise, the
636 * RCU implementation is broken.
637 */
638static void rcu_torture_timer(unsigned long unused)
639{
640 int idx;
641 int completed;
642 static DEFINE_RCU_RANDOM(rand);
643 static DEFINE_SPINLOCK(rand_lock);
644 struct rcu_torture *p;
645 int pipe_count;
646
647 idx = cur_ops->readlock();
648 completed = cur_ops->completed();
649 p = rcu_dereference(rcu_torture_current);
650 if (p == NULL) {
651 /* Leave because rcu_torture_writer is not yet underway */
652 cur_ops->readunlock(idx);
653 return;
654 }
655 if (p->rtort_mbtest == 0)
656 atomic_inc(&n_rcu_torture_mberror);
657 spin_lock(&rand_lock);
658 cur_ops->readdelay(&rand);
659 n_rcu_torture_timers++;
660 spin_unlock(&rand_lock);
661 preempt_disable();
662 pipe_count = p->rtort_pipe_count;
663 if (pipe_count > RCU_TORTURE_PIPE_LEN) {
664 /* Should not happen, but... */
665 pipe_count = RCU_TORTURE_PIPE_LEN;
666 }
667 ++__get_cpu_var(rcu_torture_count)[pipe_count];
668 completed = cur_ops->completed() - completed;
669 if (completed > RCU_TORTURE_PIPE_LEN) {
670 /* Should not happen, but... */
671 completed = RCU_TORTURE_PIPE_LEN;
672 }
673 ++__get_cpu_var(rcu_torture_batch)[completed];
674 preempt_enable();
675 cur_ops->readunlock(idx);
676}
677
678/*
572 * RCU torture reader kthread. Repeatedly dereferences rcu_torture_current, 679 * RCU torture reader kthread. Repeatedly dereferences rcu_torture_current,
573 * incrementing the corresponding element of the pipeline array. The 680 * incrementing the corresponding element of the pipeline array. The
574 * counter in the element should never be greater than 1, otherwise, the 681 * counter in the element should never be greater than 1, otherwise, the
@@ -582,11 +689,18 @@ rcu_torture_reader(void *arg)
582 DEFINE_RCU_RANDOM(rand); 689 DEFINE_RCU_RANDOM(rand);
583 struct rcu_torture *p; 690 struct rcu_torture *p;
584 int pipe_count; 691 int pipe_count;
692 struct timer_list t;
585 693
586 VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); 694 VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
587 set_user_nice(current, 19); 695 set_user_nice(current, 19);
696 if (irqreader && cur_ops->irqcapable)
697 setup_timer_on_stack(&t, rcu_torture_timer, 0);
588 698
589 do { 699 do {
700 if (irqreader && cur_ops->irqcapable) {
701 if (!timer_pending(&t))
702 mod_timer(&t, 1);
703 }
590 idx = cur_ops->readlock(); 704 idx = cur_ops->readlock();
591 completed = cur_ops->completed(); 705 completed = cur_ops->completed();
592 p = rcu_dereference(rcu_torture_current); 706 p = rcu_dereference(rcu_torture_current);
@@ -615,8 +729,11 @@ rcu_torture_reader(void *arg)
615 preempt_enable(); 729 preempt_enable();
616 cur_ops->readunlock(idx); 730 cur_ops->readunlock(idx);
617 schedule(); 731 schedule();
732 rcu_stutter_wait();
618 } while (!kthread_should_stop() && !fullstop); 733 } while (!kthread_should_stop() && !fullstop);
619 VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping"); 734 VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
735 if (irqreader && cur_ops->irqcapable)
736 del_timer_sync(&t);
620 while (!kthread_should_stop()) 737 while (!kthread_should_stop())
621 schedule_timeout_uninterruptible(1); 738 schedule_timeout_uninterruptible(1);
622 return 0; 739 return 0;
@@ -647,20 +764,22 @@ rcu_torture_printk(char *page)
647 cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); 764 cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
648 cnt += sprintf(&page[cnt], 765 cnt += sprintf(&page[cnt],
649 "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d " 766 "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d "
650 "rtmbe: %d", 767 "rtmbe: %d nt: %ld",
651 rcu_torture_current, 768 rcu_torture_current,
652 rcu_torture_current_version, 769 rcu_torture_current_version,
653 list_empty(&rcu_torture_freelist), 770 list_empty(&rcu_torture_freelist),
654 atomic_read(&n_rcu_torture_alloc), 771 atomic_read(&n_rcu_torture_alloc),
655 atomic_read(&n_rcu_torture_alloc_fail), 772 atomic_read(&n_rcu_torture_alloc_fail),
656 atomic_read(&n_rcu_torture_free), 773 atomic_read(&n_rcu_torture_free),
657 atomic_read(&n_rcu_torture_mberror)); 774 atomic_read(&n_rcu_torture_mberror),
775 n_rcu_torture_timers);
658 if (atomic_read(&n_rcu_torture_mberror) != 0) 776 if (atomic_read(&n_rcu_torture_mberror) != 0)
659 cnt += sprintf(&page[cnt], " !!!"); 777 cnt += sprintf(&page[cnt], " !!!");
660 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); 778 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
661 if (i > 1) { 779 if (i > 1) {
662 cnt += sprintf(&page[cnt], "!!! "); 780 cnt += sprintf(&page[cnt], "!!! ");
663 atomic_inc(&n_rcu_torture_error); 781 atomic_inc(&n_rcu_torture_error);
782 WARN_ON_ONCE(1);
664 } 783 }
665 cnt += sprintf(&page[cnt], "Reader Pipe: "); 784 cnt += sprintf(&page[cnt], "Reader Pipe: ");
666 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) 785 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
@@ -785,15 +904,34 @@ rcu_torture_shuffle(void *arg)
785 return 0; 904 return 0;
786} 905}
787 906
907/* Cause the rcutorture test to "stutter", starting and stopping all
908 * threads periodically.
909 */
910static int
911rcu_torture_stutter(void *arg)
912{
913 VERBOSE_PRINTK_STRING("rcu_torture_stutter task started");
914 do {
915 schedule_timeout_interruptible(stutter * HZ);
916 stutter_pause_test = 1;
917 if (!kthread_should_stop())
918 schedule_timeout_interruptible(stutter * HZ);
919 stutter_pause_test = 0;
920 } while (!kthread_should_stop());
921 VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping");
922 return 0;
923}
924
788static inline void 925static inline void
789rcu_torture_print_module_parms(char *tag) 926rcu_torture_print_module_parms(char *tag)
790{ 927{
791 printk(KERN_ALERT "%s" TORTURE_FLAG 928 printk(KERN_ALERT "%s" TORTURE_FLAG
792 "--- %s: nreaders=%d nfakewriters=%d " 929 "--- %s: nreaders=%d nfakewriters=%d "
793 "stat_interval=%d verbose=%d test_no_idle_hz=%d " 930 "stat_interval=%d verbose=%d test_no_idle_hz=%d "
794 "shuffle_interval = %d\n", 931 "shuffle_interval=%d stutter=%d irqreader=%d\n",
795 torture_type, tag, nrealreaders, nfakewriters, 932 torture_type, tag, nrealreaders, nfakewriters,
796 stat_interval, verbose, test_no_idle_hz, shuffle_interval); 933 stat_interval, verbose, test_no_idle_hz, shuffle_interval,
934 stutter, irqreader);
797} 935}
798 936
799static void 937static void
@@ -802,6 +940,11 @@ rcu_torture_cleanup(void)
802 int i; 940 int i;
803 941
804 fullstop = 1; 942 fullstop = 1;
943 if (stutter_task) {
944 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
945 kthread_stop(stutter_task);
946 }
947 stutter_task = NULL;
805 if (shuffler_task) { 948 if (shuffler_task) {
806 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task"); 949 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
807 kthread_stop(shuffler_task); 950 kthread_stop(shuffler_task);
@@ -848,7 +991,9 @@ rcu_torture_cleanup(void)
848 stats_task = NULL; 991 stats_task = NULL;
849 992
850 /* Wait for all RCU callbacks to fire. */ 993 /* Wait for all RCU callbacks to fire. */
851 rcu_barrier(); 994
995 if (cur_ops->cb_barrier != NULL)
996 cur_ops->cb_barrier();
852 997
853 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ 998 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
854 999
@@ -868,7 +1013,7 @@ rcu_torture_init(void)
868 int firsterr = 0; 1013 int firsterr = 0;
869 static struct rcu_torture_ops *torture_ops[] = 1014 static struct rcu_torture_ops *torture_ops[] =
870 { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, 1015 { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
871 &srcu_ops, &sched_ops, }; 1016 &srcu_ops, &sched_ops, &sched_ops_sync, };
872 1017
873 /* Process args and tell the world that the torturer is on the job. */ 1018 /* Process args and tell the world that the torturer is on the job. */
874 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { 1019 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
@@ -988,6 +1133,19 @@ rcu_torture_init(void)
988 goto unwind; 1133 goto unwind;
989 } 1134 }
990 } 1135 }
1136 if (stutter < 0)
1137 stutter = 0;
1138 if (stutter) {
1139 /* Create the stutter thread */
1140 stutter_task = kthread_run(rcu_torture_stutter, NULL,
1141 "rcu_torture_stutter");
1142 if (IS_ERR(stutter_task)) {
1143 firsterr = PTR_ERR(stutter_task);
1144 VERBOSE_PRINTK_ERRSTRING("Failed to create stutter");
1145 stutter_task = NULL;
1146 goto unwind;
1147 }
1148 }
991 return 0; 1149 return 0;
992 1150
993unwind: 1151unwind:
diff --git a/kernel/smp.c b/kernel/smp.c
new file mode 100644
index 000000000000..462c785ca1ee
--- /dev/null
+++ b/kernel/smp.c
@@ -0,0 +1,383 @@
1/*
2 * Generic helpers for smp ipi calls
3 *
4 * (C) Jens Axboe <jens.axboe@oracle.com> 2008
5 *
6 */
7#include <linux/init.h>
8#include <linux/module.h>
9#include <linux/percpu.h>
10#include <linux/rcupdate.h>
11#include <linux/rculist.h>
12#include <linux/smp.h>
13
14static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
15static LIST_HEAD(call_function_queue);
16__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
17
18enum {
19 CSD_FLAG_WAIT = 0x01,
20 CSD_FLAG_ALLOC = 0x02,
21};
22
23struct call_function_data {
24 struct call_single_data csd;
25 spinlock_t lock;
26 unsigned int refs;
27 cpumask_t cpumask;
28 struct rcu_head rcu_head;
29};
30
31struct call_single_queue {
32 struct list_head list;
33 spinlock_t lock;
34};
35
36void __cpuinit init_call_single_data(void)
37{
38 int i;
39
40 for_each_possible_cpu(i) {
41 struct call_single_queue *q = &per_cpu(call_single_queue, i);
42
43 spin_lock_init(&q->lock);
44 INIT_LIST_HEAD(&q->list);
45 }
46}
47
48static void csd_flag_wait(struct call_single_data *data)
49{
50 /* Wait for response */
51 do {
52 /*
53 * We need to see the flags store in the IPI handler
54 */
55 smp_mb();
56 if (!(data->flags & CSD_FLAG_WAIT))
57 break;
58 cpu_relax();
59 } while (1);
60}
61
62/*
63 * Insert a previously allocated call_single_data element for execution
64 * on the given CPU. data must already have ->func, ->info, and ->flags set.
65 */
66static void generic_exec_single(int cpu, struct call_single_data *data)
67{
68 struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
69 int wait = data->flags & CSD_FLAG_WAIT, ipi;
70 unsigned long flags;
71
72 spin_lock_irqsave(&dst->lock, flags);
73 ipi = list_empty(&dst->list);
74 list_add_tail(&data->list, &dst->list);
75 spin_unlock_irqrestore(&dst->lock, flags);
76
77 if (ipi)
78 arch_send_call_function_single_ipi(cpu);
79
80 if (wait)
81 csd_flag_wait(data);
82}
83
84static void rcu_free_call_data(struct rcu_head *head)
85{
86 struct call_function_data *data;
87
88 data = container_of(head, struct call_function_data, rcu_head);
89
90 kfree(data);
91}
92
93/*
94 * Invoked by arch to handle an IPI for call function. Must be called with
95 * interrupts disabled.
96 */
97void generic_smp_call_function_interrupt(void)
98{
99 struct call_function_data *data;
100 int cpu = get_cpu();
101
102 /*
103 * It's ok to use list_for_each_rcu() here even though we may delete
104 * 'pos', since list_del_rcu() doesn't clear ->next
105 */
106 rcu_read_lock();
107 list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
108 int refs;
109
110 if (!cpu_isset(cpu, data->cpumask))
111 continue;
112
113 data->csd.func(data->csd.info);
114
115 spin_lock(&data->lock);
116 cpu_clear(cpu, data->cpumask);
117 WARN_ON(data->refs == 0);
118 data->refs--;
119 refs = data->refs;
120 spin_unlock(&data->lock);
121
122 if (refs)
123 continue;
124
125 spin_lock(&call_function_lock);
126 list_del_rcu(&data->csd.list);
127 spin_unlock(&call_function_lock);
128
129 if (data->csd.flags & CSD_FLAG_WAIT) {
130 /*
131 * serialize stores to data with the flag clear
132 * and wakeup
133 */
134 smp_wmb();
135 data->csd.flags &= ~CSD_FLAG_WAIT;
136 } else
137 call_rcu(&data->rcu_head, rcu_free_call_data);
138 }
139 rcu_read_unlock();
140
141 put_cpu();
142}
143
144/*
145 * Invoked by arch to handle an IPI for call function single. Must be called
146 * from the arch with interrupts disabled.
147 */
148void generic_smp_call_function_single_interrupt(void)
149{
150 struct call_single_queue *q = &__get_cpu_var(call_single_queue);
151 LIST_HEAD(list);
152
153 /*
154 * Need to see other stores to list head for checking whether
155 * list is empty without holding q->lock
156 */
157 smp_mb();
158 while (!list_empty(&q->list)) {
159 unsigned int data_flags;
160
161 spin_lock(&q->lock);
162 list_replace_init(&q->list, &list);
163 spin_unlock(&q->lock);
164
165 while (!list_empty(&list)) {
166 struct call_single_data *data;
167
168 data = list_entry(list.next, struct call_single_data,
169 list);
170 list_del(&data->list);
171
172 /*
173 * 'data' can be invalid after this call if
174 * flags == 0 (when called through
175 * generic_exec_single(), so save them away before
176 * making the call.
177 */
178 data_flags = data->flags;
179
180 data->func(data->info);
181
182 if (data_flags & CSD_FLAG_WAIT) {
183 smp_wmb();
184 data->flags &= ~CSD_FLAG_WAIT;
185 } else if (data_flags & CSD_FLAG_ALLOC)
186 kfree(data);
187 }
188 /*
189 * See comment on outer loop
190 */
191 smp_mb();
192 }
193}
194
195/*
196 * smp_call_function_single - Run a function on a specific CPU
197 * @func: The function to run. This must be fast and non-blocking.
198 * @info: An arbitrary pointer to pass to the function.
199 * @wait: If true, wait until function has completed on other CPUs.
200 *
201 * Returns 0 on success, else a negative status code. Note that @wait
202 * will be implicitly turned on in case of allocation failures, since
203 * we fall back to on-stack allocation.
204 */
205int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
206 int wait)
207{
208 struct call_single_data d;
209 unsigned long flags;
210 /* prevent preemption and reschedule on another processor */
211 int me = get_cpu();
212
213 /* Can deadlock when called with interrupts disabled */
214 WARN_ON(irqs_disabled());
215
216 if (cpu == me) {
217 local_irq_save(flags);
218 func(info);
219 local_irq_restore(flags);
220 } else {
221 struct call_single_data *data = NULL;
222
223 if (!wait) {
224 data = kmalloc(sizeof(*data), GFP_ATOMIC);
225 if (data)
226 data->flags = CSD_FLAG_ALLOC;
227 }
228 if (!data) {
229 data = &d;
230 data->flags = CSD_FLAG_WAIT;
231 }
232
233 data->func = func;
234 data->info = info;
235 generic_exec_single(cpu, data);
236 }
237
238 put_cpu();
239 return 0;
240}
241EXPORT_SYMBOL(smp_call_function_single);
242
243/**
244 * __smp_call_function_single(): Run a function on another CPU
245 * @cpu: The CPU to run on.
246 * @data: Pre-allocated and setup data structure
247 *
248 * Like smp_call_function_single(), but allow caller to pass in a pre-allocated
249 * data structure. Useful for embedding @data inside other structures, for
250 * instance.
251 *
252 */
253void __smp_call_function_single(int cpu, struct call_single_data *data)
254{
255 /* Can deadlock when called with interrupts disabled */
256 WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled());
257
258 generic_exec_single(cpu, data);
259}
260
261/**
262 * smp_call_function_mask(): Run a function on a set of other CPUs.
263 * @mask: The set of cpus to run on.
264 * @func: The function to run. This must be fast and non-blocking.
265 * @info: An arbitrary pointer to pass to the function.
266 * @wait: If true, wait (atomically) until function has completed on other CPUs.
267 *
268 * Returns 0 on success, else a negative status code.
269 *
270 * If @wait is true, then returns once @func has returned. Note that @wait
271 * will be implicitly turned on in case of allocation failures, since
272 * we fall back to on-stack allocation.
273 *
274 * You must not call this function with disabled interrupts or from a
275 * hardware interrupt handler or from a bottom half handler. Preemption
276 * must be disabled when calling this function.
277 */
278int smp_call_function_mask(cpumask_t mask, void (*func)(void *), void *info,
279 int wait)
280{
281 struct call_function_data d;
282 struct call_function_data *data = NULL;
283 cpumask_t allbutself;
284 unsigned long flags;
285 int cpu, num_cpus;
286
287 /* Can deadlock when called with interrupts disabled */
288 WARN_ON(irqs_disabled());
289
290 cpu = smp_processor_id();
291 allbutself = cpu_online_map;
292 cpu_clear(cpu, allbutself);
293 cpus_and(mask, mask, allbutself);
294 num_cpus = cpus_weight(mask);
295
296 /*
297 * If zero CPUs, return. If just a single CPU, turn this request
298 * into a targetted single call instead since it's faster.
299 */
300 if (!num_cpus)
301 return 0;
302 else if (num_cpus == 1) {
303 cpu = first_cpu(mask);
304 return smp_call_function_single(cpu, func, info, wait);
305 }
306
307 if (!wait) {
308 data = kmalloc(sizeof(*data), GFP_ATOMIC);
309 if (data)
310 data->csd.flags = CSD_FLAG_ALLOC;
311 }
312 if (!data) {
313 data = &d;
314 data->csd.flags = CSD_FLAG_WAIT;
315 wait = 1;
316 }
317
318 spin_lock_init(&data->lock);
319 data->csd.func = func;
320 data->csd.info = info;
321 data->refs = num_cpus;
322 data->cpumask = mask;
323
324 spin_lock_irqsave(&call_function_lock, flags);
325 list_add_tail_rcu(&data->csd.list, &call_function_queue);
326 spin_unlock_irqrestore(&call_function_lock, flags);
327
328 /* Send a message to all CPUs in the map */
329 arch_send_call_function_ipi(mask);
330
331 /* optionally wait for the CPUs to complete */
332 if (wait)
333 csd_flag_wait(&data->csd);
334
335 return 0;
336}
337EXPORT_SYMBOL(smp_call_function_mask);
338
339/**
340 * smp_call_function(): Run a function on all other CPUs.
341 * @func: The function to run. This must be fast and non-blocking.
342 * @info: An arbitrary pointer to pass to the function.
343 * @wait: If true, wait (atomically) until function has completed on other CPUs.
344 *
345 * Returns 0 on success, else a negative status code.
346 *
347 * If @wait is true, then returns once @func has returned; otherwise
348 * it returns just before the target cpu calls @func. In case of allocation
349 * failure, @wait will be implicitly turned on.
350 *
351 * You must not call this function with disabled interrupts or from a
352 * hardware interrupt handler or from a bottom half handler.
353 */
354int smp_call_function(void (*func)(void *), void *info, int wait)
355{
356 int ret;
357
358 preempt_disable();
359 ret = smp_call_function_mask(cpu_online_map, func, info, wait);
360 preempt_enable();
361 return ret;
362}
363EXPORT_SYMBOL(smp_call_function);
364
365void ipi_call_lock(void)
366{
367 spin_lock(&call_function_lock);
368}
369
370void ipi_call_unlock(void)
371{
372 spin_unlock(&call_function_lock);
373}
374
375void ipi_call_lock_irq(void)
376{
377 spin_lock_irq(&call_function_lock);
378}
379
380void ipi_call_unlock_irq(void)
381{
382 spin_unlock_irq(&call_function_lock);
383}
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 3e9e896fdc5b..81e2fe0f983a 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -645,12 +645,12 @@ __init int spawn_ksoftirqd(void)
645/* 645/*
646 * Call a function on all processors 646 * Call a function on all processors
647 */ 647 */
648int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait) 648int on_each_cpu(void (*func) (void *info), void *info, int wait)
649{ 649{
650 int ret = 0; 650 int ret = 0;
651 651
652 preempt_disable(); 652 preempt_disable();
653 ret = smp_call_function(func, info, retry, wait); 653 ret = smp_call_function(func, info, wait);
654 local_irq_disable(); 654 local_irq_disable();
655 func(info); 655 func(info);
656 local_irq_enable(); 656 local_irq_enable();
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index b71816e47a30..94b527ef1d1e 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -6,19 +6,21 @@
6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 */ 7 */
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/module.h>
9#include <linux/kallsyms.h> 10#include <linux/kallsyms.h>
10#include <linux/stacktrace.h> 11#include <linux/stacktrace.h>
11 12
12void print_stack_trace(struct stack_trace *trace, int spaces) 13void print_stack_trace(struct stack_trace *trace, int spaces)
13{ 14{
14 int i, j; 15 int i;
15 16
16 for (i = 0; i < trace->nr_entries; i++) { 17 if (WARN_ON(!trace->entries))
17 unsigned long ip = trace->entries[i]; 18 return;
18 19
19 for (j = 0; j < spaces + 1; j++) 20 for (i = 0; i < trace->nr_entries; i++) {
20 printk(" "); 21 printk("%*c", 1 + spaces, ' ');
21 print_ip_sym(ip); 22 print_ip_sym(trace->entries[i]);
22 } 23 }
23} 24}
25EXPORT_SYMBOL_GPL(print_stack_trace);
24 26
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0d562d6531eb..6b16e16428d8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -83,6 +83,9 @@ extern int maps_protect;
83extern int sysctl_stat_interval; 83extern int sysctl_stat_interval;
84extern int latencytop_enabled; 84extern int latencytop_enabled;
85extern int sysctl_nr_open_min, sysctl_nr_open_max; 85extern int sysctl_nr_open_min, sysctl_nr_open_max;
86#ifdef CONFIG_RCU_TORTURE_TEST
87extern int rcutorture_runnable;
88#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
86 89
87/* Constants used for minimum and maximum */ 90/* Constants used for minimum and maximum */
88#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) 91#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM)
@@ -820,6 +823,16 @@ static struct ctl_table kern_table[] = {
820 .child = key_sysctls, 823 .child = key_sysctls,
821 }, 824 },
822#endif 825#endif
826#ifdef CONFIG_RCU_TORTURE_TEST
827 {
828 .ctl_name = CTL_UNNUMBERED,
829 .procname = "rcutorture_runnable",
830 .data = &rcutorture_runnable,
831 .maxlen = sizeof(int),
832 .mode = 0644,
833 .proc_handler = &proc_dointvec,
834 },
835#endif
823/* 836/*
824 * NOTE: do not add new entries to this table unless you have read 837 * NOTE: do not add new entries to this table unless you have read
825 * Documentation/sysctl/ctl_unnumbered.txt 838 * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 67f80c261709..f48d0f09d32f 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -268,7 +268,7 @@ void tick_broadcast_on_off(unsigned long reason, int *oncpu)
268 "offline CPU #%d\n", *oncpu); 268 "offline CPU #%d\n", *oncpu);
269 else 269 else
270 smp_call_function_single(*oncpu, tick_do_broadcast_on_off, 270 smp_call_function_single(*oncpu, tick_do_broadcast_on_off,
271 &reason, 1, 1); 271 &reason, 1);
272} 272}
273 273
274/* 274/*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d63008b09a4c..beef7ccdf842 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -48,6 +48,13 @@ static void tick_do_update_jiffies64(ktime_t now)
48 unsigned long ticks = 0; 48 unsigned long ticks = 0;
49 ktime_t delta; 49 ktime_t delta;
50 50
51 /*
52 * Do a quick check without holding xtime_lock:
53 */
54 delta = ktime_sub(now, last_jiffies_update);
55 if (delta.tv64 < tick_period.tv64)
56 return;
57
51 /* Reevalute with xtime_lock held */ 58 /* Reevalute with xtime_lock held */
52 write_seqlock(&xtime_lock); 59 write_seqlock(&xtime_lock);
53 60
@@ -228,6 +235,7 @@ void tick_nohz_stop_sched_tick(void)
228 local_softirq_pending()); 235 local_softirq_pending());
229 ratelimit++; 236 ratelimit++;
230 } 237 }
238 goto end;
231 } 239 }
232 240
233 ts->idle_calls++; 241 ts->idle_calls++;