aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-10-26 10:26:53 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-10-26 10:26:53 -0400
commit19b4a8d520a6e0176dd52aaa429261ad4fcaa545 (patch)
tree6dcf5a780718fc50b9cd79cc803daa7c7e080a02 /kernel
parent3cfef9524677a4ecb392d6fbffe6ebce6302f1d4 (diff)
parent048b718029033af117870d3da47da12995be14a3 (diff)
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits) rcu: Move propagation of ->completed from rcu_start_gp() to rcu_report_qs_rsp() rcu: Remove rcu_needs_cpu_flush() to avoid false quiescent states rcu: Wire up RCU_BOOST_PRIO for rcutree rcu: Make rcu_torture_boost() exit loops at end of test rcu: Make rcu_torture_fqs() exit loops at end of test rcu: Permit rt_mutex_unlock() with irqs disabled rcu: Avoid having just-onlined CPU resched itself when RCU is idle rcu: Suppress NMI backtraces when stall ends before dump rcu: Prohibit grace periods during early boot rcu: Simplify unboosting checks rcu: Prevent early boot set_need_resched() from __rcu_pending() rcu: Dump local stack if cannot dump all CPUs' stacks rcu: Move __rcu_read_unlock()'s barrier() within if-statement rcu: Improve rcu_assign_pointer() and RCU_INIT_POINTER() documentation rcu: Make rcu_assign_pointer() unconditionally insert a memory barrier rcu: Make rcu_implicit_dynticks_qs() locals be correct size rcu: Eliminate in_irq() checks in rcu_enter_nohz() nohz: Remove nohz_cpu_mask rcu: Document interpretation of RCU-lockdep splats rcu: Allow rcutorture's stat_interval parameter to be changed at runtime ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/lockdep.c84
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/rcu.h85
-rw-r--r--kernel/rcupdate.c26
-rw-r--r--kernel/rcutiny.c117
-rw-r--r--kernel/rcutiny_plugin.h134
-rw-r--r--kernel/rcutorture.c77
-rw-r--r--kernel/rcutree.c290
-rw-r--r--kernel/rcutree.h17
-rw-r--r--kernel/rcutree_plugin.h150
-rw-r--r--kernel/rcutree_trace.c13
-rw-r--r--kernel/rtmutex.c8
-rw-r--r--kernel/sched.c13
-rw-r--r--kernel/time/tick-sched.c6
14 files changed, 599 insertions, 425 deletions
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index c081fa967c8f..e69434b070da 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -1145,10 +1145,11 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
1145 if (debug_locks_silent) 1145 if (debug_locks_silent)
1146 return 0; 1146 return 0;
1147 1147
1148 printk("\n=======================================================\n"); 1148 printk("\n");
1149 printk( "[ INFO: possible circular locking dependency detected ]\n"); 1149 printk("======================================================\n");
1150 printk("[ INFO: possible circular locking dependency detected ]\n");
1150 print_kernel_version(); 1151 print_kernel_version();
1151 printk( "-------------------------------------------------------\n"); 1152 printk("-------------------------------------------------------\n");
1152 printk("%s/%d is trying to acquire lock:\n", 1153 printk("%s/%d is trying to acquire lock:\n",
1153 curr->comm, task_pid_nr(curr)); 1154 curr->comm, task_pid_nr(curr));
1154 print_lock(check_src); 1155 print_lock(check_src);
@@ -1482,11 +1483,12 @@ print_bad_irq_dependency(struct task_struct *curr,
1482 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 1483 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
1483 return 0; 1484 return 0;
1484 1485
1485 printk("\n======================================================\n"); 1486 printk("\n");
1486 printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", 1487 printk("======================================================\n");
1488 printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
1487 irqclass, irqclass); 1489 irqclass, irqclass);
1488 print_kernel_version(); 1490 print_kernel_version();
1489 printk( "------------------------------------------------------\n"); 1491 printk("------------------------------------------------------\n");
1490 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", 1492 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
1491 curr->comm, task_pid_nr(curr), 1493 curr->comm, task_pid_nr(curr),
1492 curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, 1494 curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
@@ -1711,10 +1713,11 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
1711 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 1713 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
1712 return 0; 1714 return 0;
1713 1715
1714 printk("\n=============================================\n"); 1716 printk("\n");
1715 printk( "[ INFO: possible recursive locking detected ]\n"); 1717 printk("=============================================\n");
1718 printk("[ INFO: possible recursive locking detected ]\n");
1716 print_kernel_version(); 1719 print_kernel_version();
1717 printk( "---------------------------------------------\n"); 1720 printk("---------------------------------------------\n");
1718 printk("%s/%d is trying to acquire lock:\n", 1721 printk("%s/%d is trying to acquire lock:\n",
1719 curr->comm, task_pid_nr(curr)); 1722 curr->comm, task_pid_nr(curr));
1720 print_lock(next); 1723 print_lock(next);
@@ -2217,10 +2220,11 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
2217 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 2220 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
2218 return 0; 2221 return 0;
2219 2222
2220 printk("\n=================================\n"); 2223 printk("\n");
2221 printk( "[ INFO: inconsistent lock state ]\n"); 2224 printk("=================================\n");
2225 printk("[ INFO: inconsistent lock state ]\n");
2222 print_kernel_version(); 2226 print_kernel_version();
2223 printk( "---------------------------------\n"); 2227 printk("---------------------------------\n");
2224 2228
2225 printk("inconsistent {%s} -> {%s} usage.\n", 2229 printk("inconsistent {%s} -> {%s} usage.\n",
2226 usage_str[prev_bit], usage_str[new_bit]); 2230 usage_str[prev_bit], usage_str[new_bit]);
@@ -2281,10 +2285,11 @@ print_irq_inversion_bug(struct task_struct *curr,
2281 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 2285 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
2282 return 0; 2286 return 0;
2283 2287
2284 printk("\n=========================================================\n"); 2288 printk("\n");
2285 printk( "[ INFO: possible irq lock inversion dependency detected ]\n"); 2289 printk("=========================================================\n");
2290 printk("[ INFO: possible irq lock inversion dependency detected ]\n");
2286 print_kernel_version(); 2291 print_kernel_version();
2287 printk( "---------------------------------------------------------\n"); 2292 printk("---------------------------------------------------------\n");
2288 printk("%s/%d just changed the state of lock:\n", 2293 printk("%s/%d just changed the state of lock:\n",
2289 curr->comm, task_pid_nr(curr)); 2294 curr->comm, task_pid_nr(curr));
2290 print_lock(this); 2295 print_lock(this);
@@ -3161,9 +3166,10 @@ print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
3161 if (debug_locks_silent) 3166 if (debug_locks_silent)
3162 return 0; 3167 return 0;
3163 3168
3164 printk("\n=====================================\n"); 3169 printk("\n");
3165 printk( "[ BUG: bad unlock balance detected! ]\n"); 3170 printk("=====================================\n");
3166 printk( "-------------------------------------\n"); 3171 printk("[ BUG: bad unlock balance detected! ]\n");
3172 printk("-------------------------------------\n");
3167 printk("%s/%d is trying to release lock (", 3173 printk("%s/%d is trying to release lock (",
3168 curr->comm, task_pid_nr(curr)); 3174 curr->comm, task_pid_nr(curr));
3169 print_lockdep_cache(lock); 3175 print_lockdep_cache(lock);
@@ -3604,9 +3610,10 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
3604 if (debug_locks_silent) 3610 if (debug_locks_silent)
3605 return 0; 3611 return 0;
3606 3612
3607 printk("\n=================================\n"); 3613 printk("\n");
3608 printk( "[ BUG: bad contention detected! ]\n"); 3614 printk("=================================\n");
3609 printk( "---------------------------------\n"); 3615 printk("[ BUG: bad contention detected! ]\n");
3616 printk("---------------------------------\n");
3610 printk("%s/%d is trying to contend lock (", 3617 printk("%s/%d is trying to contend lock (",
3611 curr->comm, task_pid_nr(curr)); 3618 curr->comm, task_pid_nr(curr));
3612 print_lockdep_cache(lock); 3619 print_lockdep_cache(lock);
@@ -3977,9 +3984,10 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
3977 if (debug_locks_silent) 3984 if (debug_locks_silent)
3978 return; 3985 return;
3979 3986
3980 printk("\n=========================\n"); 3987 printk("\n");
3981 printk( "[ BUG: held lock freed! ]\n"); 3988 printk("=========================\n");
3982 printk( "-------------------------\n"); 3989 printk("[ BUG: held lock freed! ]\n");
3990 printk("-------------------------\n");
3983 printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", 3991 printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
3984 curr->comm, task_pid_nr(curr), mem_from, mem_to-1); 3992 curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
3985 print_lock(hlock); 3993 print_lock(hlock);
@@ -4033,9 +4041,10 @@ static void print_held_locks_bug(struct task_struct *curr)
4033 if (debug_locks_silent) 4041 if (debug_locks_silent)
4034 return; 4042 return;
4035 4043
4036 printk("\n=====================================\n"); 4044 printk("\n");
4037 printk( "[ BUG: lock held at task exit time! ]\n"); 4045 printk("=====================================\n");
4038 printk( "-------------------------------------\n"); 4046 printk("[ BUG: lock held at task exit time! ]\n");
4047 printk("-------------------------------------\n");
4039 printk("%s/%d is exiting with locks still held!\n", 4048 printk("%s/%d is exiting with locks still held!\n",
4040 curr->comm, task_pid_nr(curr)); 4049 curr->comm, task_pid_nr(curr));
4041 lockdep_print_held_locks(curr); 4050 lockdep_print_held_locks(curr);
@@ -4129,16 +4138,17 @@ void lockdep_sys_exit(void)
4129 if (unlikely(curr->lockdep_depth)) { 4138 if (unlikely(curr->lockdep_depth)) {
4130 if (!debug_locks_off()) 4139 if (!debug_locks_off())
4131 return; 4140 return;
4132 printk("\n================================================\n"); 4141 printk("\n");
4133 printk( "[ BUG: lock held when returning to user space! ]\n"); 4142 printk("================================================\n");
4134 printk( "------------------------------------------------\n"); 4143 printk("[ BUG: lock held when returning to user space! ]\n");
4144 printk("------------------------------------------------\n");
4135 printk("%s/%d is leaving the kernel with locks still held!\n", 4145 printk("%s/%d is leaving the kernel with locks still held!\n",
4136 curr->comm, curr->pid); 4146 curr->comm, curr->pid);
4137 lockdep_print_held_locks(curr); 4147 lockdep_print_held_locks(curr);
4138 } 4148 }
4139} 4149}
4140 4150
4141void lockdep_rcu_dereference(const char *file, const int line) 4151void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
4142{ 4152{
4143 struct task_struct *curr = current; 4153 struct task_struct *curr = current;
4144 4154
@@ -4147,15 +4157,15 @@ void lockdep_rcu_dereference(const char *file, const int line)
4147 return; 4157 return;
4148#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ 4158#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
4149 /* Note: the following can be executed concurrently, so be careful. */ 4159 /* Note: the following can be executed concurrently, so be careful. */
4150 printk("\n===================================================\n"); 4160 printk("\n");
4151 printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n"); 4161 printk("===============================\n");
4152 printk( "---------------------------------------------------\n"); 4162 printk("[ INFO: suspicious RCU usage. ]\n");
4153 printk("%s:%d invoked rcu_dereference_check() without protection!\n", 4163 printk("-------------------------------\n");
4154 file, line); 4164 printk("%s:%d %s!\n", file, line, s);
4155 printk("\nother info that might help us debug this:\n\n"); 4165 printk("\nother info that might help us debug this:\n\n");
4156 printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); 4166 printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks);
4157 lockdep_print_held_locks(curr); 4167 lockdep_print_held_locks(curr);
4158 printk("\nstack backtrace:\n"); 4168 printk("\nstack backtrace:\n");
4159 dump_stack(); 4169 dump_stack();
4160} 4170}
4161EXPORT_SYMBOL_GPL(lockdep_rcu_dereference); 4171EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
diff --git a/kernel/pid.c b/kernel/pid.c
index e432057f3b21..8cafe7e72ad2 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -418,7 +418,9 @@ EXPORT_SYMBOL(pid_task);
418 */ 418 */
419struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) 419struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
420{ 420{
421 rcu_lockdep_assert(rcu_read_lock_held()); 421 rcu_lockdep_assert(rcu_read_lock_held(),
422 "find_task_by_pid_ns() needs rcu_read_lock()"
423 " protection");
422 return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); 424 return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
423} 425}
424 426
diff --git a/kernel/rcu.h b/kernel/rcu.h
new file mode 100644
index 000000000000..f600868d550d
--- /dev/null
+++ b/kernel/rcu.h
@@ -0,0 +1,85 @@
1/*
2 * Read-Copy Update definitions shared among RCU implementations.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corporation, 2011
19 *
20 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
21 */
22
23#ifndef __LINUX_RCU_H
24#define __LINUX_RCU_H
25
26#ifdef CONFIG_RCU_TRACE
27#define RCU_TRACE(stmt) stmt
28#else /* #ifdef CONFIG_RCU_TRACE */
29#define RCU_TRACE(stmt)
30#endif /* #else #ifdef CONFIG_RCU_TRACE */
31
32/*
33 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
34 * by call_rcu() and rcu callback execution, and are therefore not part of the
35 * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors.
36 */
37
38#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
39# define STATE_RCU_HEAD_READY 0
40# define STATE_RCU_HEAD_QUEUED 1
41
42extern struct debug_obj_descr rcuhead_debug_descr;
43
44static inline void debug_rcu_head_queue(struct rcu_head *head)
45{
46 WARN_ON_ONCE((unsigned long)head & 0x3);
47 debug_object_activate(head, &rcuhead_debug_descr);
48 debug_object_active_state(head, &rcuhead_debug_descr,
49 STATE_RCU_HEAD_READY,
50 STATE_RCU_HEAD_QUEUED);
51}
52
53static inline void debug_rcu_head_unqueue(struct rcu_head *head)
54{
55 debug_object_active_state(head, &rcuhead_debug_descr,
56 STATE_RCU_HEAD_QUEUED,
57 STATE_RCU_HEAD_READY);
58 debug_object_deactivate(head, &rcuhead_debug_descr);
59}
60#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
61static inline void debug_rcu_head_queue(struct rcu_head *head)
62{
63}
64
65static inline void debug_rcu_head_unqueue(struct rcu_head *head)
66{
67}
68#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
69
70extern void kfree(const void *);
71
72static inline void __rcu_reclaim(char *rn, struct rcu_head *head)
73{
74 unsigned long offset = (unsigned long)head->func;
75
76 if (__is_kfree_rcu_offset(offset)) {
77 RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset));
78 kfree((void *)head - offset);
79 } else {
80 RCU_TRACE(trace_rcu_invoke_callback(rn, head));
81 head->func(head);
82 }
83}
84
85#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index ddddb320be61..ca0d23b6b3e8 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -46,6 +46,11 @@
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/hardirq.h> 47#include <linux/hardirq.h>
48 48
49#define CREATE_TRACE_POINTS
50#include <trace/events/rcu.h>
51
52#include "rcu.h"
53
49#ifdef CONFIG_DEBUG_LOCK_ALLOC 54#ifdef CONFIG_DEBUG_LOCK_ALLOC
50static struct lock_class_key rcu_lock_key; 55static struct lock_class_key rcu_lock_key;
51struct lockdep_map rcu_lock_map = 56struct lockdep_map rcu_lock_map =
@@ -94,11 +99,16 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
94 99
95#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 100#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
96 101
102struct rcu_synchronize {
103 struct rcu_head head;
104 struct completion completion;
105};
106
97/* 107/*
98 * Awaken the corresponding synchronize_rcu() instance now that a 108 * Awaken the corresponding synchronize_rcu() instance now that a
99 * grace period has elapsed. 109 * grace period has elapsed.
100 */ 110 */
101void wakeme_after_rcu(struct rcu_head *head) 111static void wakeme_after_rcu(struct rcu_head *head)
102{ 112{
103 struct rcu_synchronize *rcu; 113 struct rcu_synchronize *rcu;
104 114
@@ -106,6 +116,20 @@ void wakeme_after_rcu(struct rcu_head *head)
106 complete(&rcu->completion); 116 complete(&rcu->completion);
107} 117}
108 118
119void wait_rcu_gp(call_rcu_func_t crf)
120{
121 struct rcu_synchronize rcu;
122
123 init_rcu_head_on_stack(&rcu.head);
124 init_completion(&rcu.completion);
125 /* Will wake me after RCU finished. */
126 crf(&rcu.head, wakeme_after_rcu);
127 /* Wait for it. */
128 wait_for_completion(&rcu.completion);
129 destroy_rcu_head_on_stack(&rcu.head);
130}
131EXPORT_SYMBOL_GPL(wait_rcu_gp);
132
109#ifdef CONFIG_PROVE_RCU 133#ifdef CONFIG_PROVE_RCU
110/* 134/*
111 * wrapper function to avoid #include problems. 135 * wrapper function to avoid #include problems.
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 7bbac7d0f5ab..da775c87f27f 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -37,16 +37,17 @@
37#include <linux/cpu.h> 37#include <linux/cpu.h>
38#include <linux/prefetch.h> 38#include <linux/prefetch.h>
39 39
40/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ 40#ifdef CONFIG_RCU_TRACE
41static struct task_struct *rcu_kthread_task; 41#include <trace/events/rcu.h>
42static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); 42#endif /* #else #ifdef CONFIG_RCU_TRACE */
43static unsigned long have_rcu_kthread_work; 43
44#include "rcu.h"
44 45
45/* Forward declarations for rcutiny_plugin.h. */ 46/* Forward declarations for rcutiny_plugin.h. */
46struct rcu_ctrlblk; 47struct rcu_ctrlblk;
47static void invoke_rcu_kthread(void); 48static void invoke_rcu_callbacks(void);
48static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); 49static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
49static int rcu_kthread(void *arg); 50static void rcu_process_callbacks(struct softirq_action *unused);
50static void __call_rcu(struct rcu_head *head, 51static void __call_rcu(struct rcu_head *head,
51 void (*func)(struct rcu_head *rcu), 52 void (*func)(struct rcu_head *rcu),
52 struct rcu_ctrlblk *rcp); 53 struct rcu_ctrlblk *rcp);
@@ -96,16 +97,6 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
96} 97}
97 98
98/* 99/*
99 * Wake up rcu_kthread() to process callbacks now eligible for invocation
100 * or to boost readers.
101 */
102static void invoke_rcu_kthread(void)
103{
104 have_rcu_kthread_work = 1;
105 wake_up(&rcu_kthread_wq);
106}
107
108/*
109 * Record an rcu quiescent state. And an rcu_bh quiescent state while we 100 * Record an rcu quiescent state. And an rcu_bh quiescent state while we
110 * are at it, given that any rcu quiescent state is also an rcu_bh 101 * are at it, given that any rcu quiescent state is also an rcu_bh
111 * quiescent state. Use "+" instead of "||" to defeat short circuiting. 102 * quiescent state. Use "+" instead of "||" to defeat short circuiting.
@@ -117,7 +108,7 @@ void rcu_sched_qs(int cpu)
117 local_irq_save(flags); 108 local_irq_save(flags);
118 if (rcu_qsctr_help(&rcu_sched_ctrlblk) + 109 if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
119 rcu_qsctr_help(&rcu_bh_ctrlblk)) 110 rcu_qsctr_help(&rcu_bh_ctrlblk))
120 invoke_rcu_kthread(); 111 invoke_rcu_callbacks();
121 local_irq_restore(flags); 112 local_irq_restore(flags);
122} 113}
123 114
@@ -130,7 +121,7 @@ void rcu_bh_qs(int cpu)
130 121
131 local_irq_save(flags); 122 local_irq_save(flags);
132 if (rcu_qsctr_help(&rcu_bh_ctrlblk)) 123 if (rcu_qsctr_help(&rcu_bh_ctrlblk))
133 invoke_rcu_kthread(); 124 invoke_rcu_callbacks();
134 local_irq_restore(flags); 125 local_irq_restore(flags);
135} 126}
136 127
@@ -154,18 +145,23 @@ void rcu_check_callbacks(int cpu, int user)
154 * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure 145 * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure
155 * whose grace period has elapsed. 146 * whose grace period has elapsed.
156 */ 147 */
157static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) 148static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
158{ 149{
150 char *rn = NULL;
159 struct rcu_head *next, *list; 151 struct rcu_head *next, *list;
160 unsigned long flags; 152 unsigned long flags;
161 RCU_TRACE(int cb_count = 0); 153 RCU_TRACE(int cb_count = 0);
162 154
163 /* If no RCU callbacks ready to invoke, just return. */ 155 /* If no RCU callbacks ready to invoke, just return. */
164 if (&rcp->rcucblist == rcp->donetail) 156 if (&rcp->rcucblist == rcp->donetail) {
157 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1));
158 RCU_TRACE(trace_rcu_batch_end(rcp->name, 0));
165 return; 159 return;
160 }
166 161
167 /* Move the ready-to-invoke callbacks to a local list. */ 162 /* Move the ready-to-invoke callbacks to a local list. */
168 local_irq_save(flags); 163 local_irq_save(flags);
164 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1));
169 list = rcp->rcucblist; 165 list = rcp->rcucblist;
170 rcp->rcucblist = *rcp->donetail; 166 rcp->rcucblist = *rcp->donetail;
171 *rcp->donetail = NULL; 167 *rcp->donetail = NULL;
@@ -176,49 +172,26 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
176 local_irq_restore(flags); 172 local_irq_restore(flags);
177 173
178 /* Invoke the callbacks on the local list. */ 174 /* Invoke the callbacks on the local list. */
175 RCU_TRACE(rn = rcp->name);
179 while (list) { 176 while (list) {
180 next = list->next; 177 next = list->next;
181 prefetch(next); 178 prefetch(next);
182 debug_rcu_head_unqueue(list); 179 debug_rcu_head_unqueue(list);
183 local_bh_disable(); 180 local_bh_disable();
184 __rcu_reclaim(list); 181 __rcu_reclaim(rn, list);
185 local_bh_enable(); 182 local_bh_enable();
186 list = next; 183 list = next;
187 RCU_TRACE(cb_count++); 184 RCU_TRACE(cb_count++);
188 } 185 }
189 RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); 186 RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
187 RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count));
190} 188}
191 189
192/* 190static void rcu_process_callbacks(struct softirq_action *unused)
193 * This kthread invokes RCU callbacks whose grace periods have
194 * elapsed. It is awakened as needed, and takes the place of the
195 * RCU_SOFTIRQ that was used previously for this purpose.
196 * This is a kthread, but it is never stopped, at least not until
197 * the system goes down.
198 */
199static int rcu_kthread(void *arg)
200{ 191{
201 unsigned long work; 192 __rcu_process_callbacks(&rcu_sched_ctrlblk);
202 unsigned long morework; 193 __rcu_process_callbacks(&rcu_bh_ctrlblk);
203 unsigned long flags; 194 rcu_preempt_process_callbacks();
204
205 for (;;) {
206 wait_event_interruptible(rcu_kthread_wq,
207 have_rcu_kthread_work != 0);
208 morework = rcu_boost();
209 local_irq_save(flags);
210 work = have_rcu_kthread_work;
211 have_rcu_kthread_work = morework;
212 local_irq_restore(flags);
213 if (work) {
214 rcu_process_callbacks(&rcu_sched_ctrlblk);
215 rcu_process_callbacks(&rcu_bh_ctrlblk);
216 rcu_preempt_process_callbacks();
217 }
218 schedule_timeout_interruptible(1); /* Leave CPU for others. */
219 }
220
221 return 0; /* Not reached, but needed to shut gcc up. */
222} 195}
223 196
224/* 197/*
@@ -280,45 +253,3 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
280 __call_rcu(head, func, &rcu_bh_ctrlblk); 253 __call_rcu(head, func, &rcu_bh_ctrlblk);
281} 254}
282EXPORT_SYMBOL_GPL(call_rcu_bh); 255EXPORT_SYMBOL_GPL(call_rcu_bh);
283
284void rcu_barrier_bh(void)
285{
286 struct rcu_synchronize rcu;
287
288 init_rcu_head_on_stack(&rcu.head);
289 init_completion(&rcu.completion);
290 /* Will wake me after RCU finished. */
291 call_rcu_bh(&rcu.head, wakeme_after_rcu);
292 /* Wait for it. */
293 wait_for_completion(&rcu.completion);
294 destroy_rcu_head_on_stack(&rcu.head);
295}
296EXPORT_SYMBOL_GPL(rcu_barrier_bh);
297
298void rcu_barrier_sched(void)
299{
300 struct rcu_synchronize rcu;
301
302 init_rcu_head_on_stack(&rcu.head);
303 init_completion(&rcu.completion);
304 /* Will wake me after RCU finished. */
305 call_rcu_sched(&rcu.head, wakeme_after_rcu);
306 /* Wait for it. */
307 wait_for_completion(&rcu.completion);
308 destroy_rcu_head_on_stack(&rcu.head);
309}
310EXPORT_SYMBOL_GPL(rcu_barrier_sched);
311
312/*
313 * Spawn the kthread that invokes RCU callbacks.
314 */
315static int __init rcu_spawn_kthreads(void)
316{
317 struct sched_param sp;
318
319 rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
320 sp.sched_priority = RCU_BOOST_PRIO;
321 sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
322 return 0;
323}
324early_initcall(rcu_spawn_kthreads);
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index f259c676195f..02aa7139861c 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -26,29 +26,26 @@
26#include <linux/debugfs.h> 26#include <linux/debugfs.h>
27#include <linux/seq_file.h> 27#include <linux/seq_file.h>
28 28
29#ifdef CONFIG_RCU_TRACE
30#define RCU_TRACE(stmt) stmt
31#else /* #ifdef CONFIG_RCU_TRACE */
32#define RCU_TRACE(stmt)
33#endif /* #else #ifdef CONFIG_RCU_TRACE */
34
35/* Global control variables for rcupdate callback mechanism. */ 29/* Global control variables for rcupdate callback mechanism. */
36struct rcu_ctrlblk { 30struct rcu_ctrlblk {
37 struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ 31 struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */
38 struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ 32 struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
39 struct rcu_head **curtail; /* ->next pointer of last CB. */ 33 struct rcu_head **curtail; /* ->next pointer of last CB. */
40 RCU_TRACE(long qlen); /* Number of pending CBs. */ 34 RCU_TRACE(long qlen); /* Number of pending CBs. */
35 RCU_TRACE(char *name); /* Name of RCU type. */
41}; 36};
42 37
43/* Definition for rcupdate control block. */ 38/* Definition for rcupdate control block. */
44static struct rcu_ctrlblk rcu_sched_ctrlblk = { 39static struct rcu_ctrlblk rcu_sched_ctrlblk = {
45 .donetail = &rcu_sched_ctrlblk.rcucblist, 40 .donetail = &rcu_sched_ctrlblk.rcucblist,
46 .curtail = &rcu_sched_ctrlblk.rcucblist, 41 .curtail = &rcu_sched_ctrlblk.rcucblist,
42 RCU_TRACE(.name = "rcu_sched")
47}; 43};
48 44
49static struct rcu_ctrlblk rcu_bh_ctrlblk = { 45static struct rcu_ctrlblk rcu_bh_ctrlblk = {
50 .donetail = &rcu_bh_ctrlblk.rcucblist, 46 .donetail = &rcu_bh_ctrlblk.rcucblist,
51 .curtail = &rcu_bh_ctrlblk.rcucblist, 47 .curtail = &rcu_bh_ctrlblk.rcucblist,
48 RCU_TRACE(.name = "rcu_bh")
52}; 49};
53 50
54#ifdef CONFIG_DEBUG_LOCK_ALLOC 51#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -131,6 +128,7 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
131 .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, 128 .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist,
132 .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, 129 .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist,
133 .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), 130 .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks),
131 RCU_TRACE(.rcb.name = "rcu_preempt")
134}; 132};
135 133
136static int rcu_preempted_readers_exp(void); 134static int rcu_preempted_readers_exp(void);
@@ -247,6 +245,13 @@ static void show_tiny_preempt_stats(struct seq_file *m)
247 245
248#include "rtmutex_common.h" 246#include "rtmutex_common.h"
249 247
248#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
249
250/* Controls for rcu_kthread() kthread. */
251static struct task_struct *rcu_kthread_task;
252static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
253static unsigned long have_rcu_kthread_work;
254
250/* 255/*
251 * Carry out RCU priority boosting on the task indicated by ->boost_tasks, 256 * Carry out RCU priority boosting on the task indicated by ->boost_tasks,
252 * and advance ->boost_tasks to the next task in the ->blkd_tasks list. 257 * and advance ->boost_tasks to the next task in the ->blkd_tasks list.
@@ -334,7 +339,7 @@ static int rcu_initiate_boost(void)
334 if (rcu_preempt_ctrlblk.exp_tasks == NULL) 339 if (rcu_preempt_ctrlblk.exp_tasks == NULL)
335 rcu_preempt_ctrlblk.boost_tasks = 340 rcu_preempt_ctrlblk.boost_tasks =
336 rcu_preempt_ctrlblk.gp_tasks; 341 rcu_preempt_ctrlblk.gp_tasks;
337 invoke_rcu_kthread(); 342 invoke_rcu_callbacks();
338 } else 343 } else
339 RCU_TRACE(rcu_initiate_boost_trace()); 344 RCU_TRACE(rcu_initiate_boost_trace());
340 return 1; 345 return 1;
@@ -353,14 +358,6 @@ static void rcu_preempt_boost_start_gp(void)
353#else /* #ifdef CONFIG_RCU_BOOST */ 358#else /* #ifdef CONFIG_RCU_BOOST */
354 359
355/* 360/*
356 * If there is no RCU priority boosting, we don't boost.
357 */
358static int rcu_boost(void)
359{
360 return 0;
361}
362
363/*
364 * If there is no RCU priority boosting, we don't initiate boosting, 361 * If there is no RCU priority boosting, we don't initiate boosting,
365 * but we do indicate whether there are blocked readers blocking the 362 * but we do indicate whether there are blocked readers blocking the
366 * current grace period. 363 * current grace period.
@@ -427,7 +424,7 @@ static void rcu_preempt_cpu_qs(void)
427 424
428 /* If there are done callbacks, cause them to be invoked. */ 425 /* If there are done callbacks, cause them to be invoked. */
429 if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) 426 if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
430 invoke_rcu_kthread(); 427 invoke_rcu_callbacks();
431} 428}
432 429
433/* 430/*
@@ -648,7 +645,7 @@ static void rcu_preempt_check_callbacks(void)
648 rcu_preempt_cpu_qs(); 645 rcu_preempt_cpu_qs();
649 if (&rcu_preempt_ctrlblk.rcb.rcucblist != 646 if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
650 rcu_preempt_ctrlblk.rcb.donetail) 647 rcu_preempt_ctrlblk.rcb.donetail)
651 invoke_rcu_kthread(); 648 invoke_rcu_callbacks();
652 if (rcu_preempt_gp_in_progress() && 649 if (rcu_preempt_gp_in_progress() &&
653 rcu_cpu_blocking_cur_gp() && 650 rcu_cpu_blocking_cur_gp() &&
654 rcu_preempt_running_reader()) 651 rcu_preempt_running_reader())
@@ -674,7 +671,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
674 */ 671 */
675static void rcu_preempt_process_callbacks(void) 672static void rcu_preempt_process_callbacks(void)
676{ 673{
677 rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); 674 __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
678} 675}
679 676
680/* 677/*
@@ -697,20 +694,6 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
697} 694}
698EXPORT_SYMBOL_GPL(call_rcu); 695EXPORT_SYMBOL_GPL(call_rcu);
699 696
700void rcu_barrier(void)
701{
702 struct rcu_synchronize rcu;
703
704 init_rcu_head_on_stack(&rcu.head);
705 init_completion(&rcu.completion);
706 /* Will wake me after RCU finished. */
707 call_rcu(&rcu.head, wakeme_after_rcu);
708 /* Wait for it. */
709 wait_for_completion(&rcu.completion);
710 destroy_rcu_head_on_stack(&rcu.head);
711}
712EXPORT_SYMBOL_GPL(rcu_barrier);
713
714/* 697/*
715 * synchronize_rcu - wait until a grace period has elapsed. 698 * synchronize_rcu - wait until a grace period has elapsed.
716 * 699 *
@@ -864,15 +847,6 @@ static void show_tiny_preempt_stats(struct seq_file *m)
864#endif /* #ifdef CONFIG_RCU_TRACE */ 847#endif /* #ifdef CONFIG_RCU_TRACE */
865 848
866/* 849/*
867 * Because preemptible RCU does not exist, it is never necessary to
868 * boost preempted RCU readers.
869 */
870static int rcu_boost(void)
871{
872 return 0;
873}
874
875/*
876 * Because preemptible RCU does not exist, it never has any callbacks 850 * Because preemptible RCU does not exist, it never has any callbacks
877 * to check. 851 * to check.
878 */ 852 */
@@ -898,6 +872,78 @@ static void rcu_preempt_process_callbacks(void)
898 872
899#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ 873#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
900 874
875#ifdef CONFIG_RCU_BOOST
876
877/*
878 * Wake up rcu_kthread() to process callbacks now eligible for invocation
879 * or to boost readers.
880 */
881static void invoke_rcu_callbacks(void)
882{
883 have_rcu_kthread_work = 1;
884 wake_up(&rcu_kthread_wq);
885}
886
887/*
888 * This kthread invokes RCU callbacks whose grace periods have
889 * elapsed. It is awakened as needed, and takes the place of the
890 * RCU_SOFTIRQ that is used for this purpose when boosting is disabled.
891 * This is a kthread, but it is never stopped, at least not until
892 * the system goes down.
893 */
894static int rcu_kthread(void *arg)
895{
896 unsigned long work;
897 unsigned long morework;
898 unsigned long flags;
899
900 for (;;) {
901 wait_event_interruptible(rcu_kthread_wq,
902 have_rcu_kthread_work != 0);
903 morework = rcu_boost();
904 local_irq_save(flags);
905 work = have_rcu_kthread_work;
906 have_rcu_kthread_work = morework;
907 local_irq_restore(flags);
908 if (work)
909 rcu_process_callbacks(NULL);
910 schedule_timeout_interruptible(1); /* Leave CPU for others. */
911 }
912
913 return 0; /* Not reached, but needed to shut gcc up. */
914}
915
916/*
917 * Spawn the kthread that invokes RCU callbacks.
918 */
919static int __init rcu_spawn_kthreads(void)
920{
921 struct sched_param sp;
922
923 rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
924 sp.sched_priority = RCU_BOOST_PRIO;
925 sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
926 return 0;
927}
928early_initcall(rcu_spawn_kthreads);
929
930#else /* #ifdef CONFIG_RCU_BOOST */
931
932/*
933 * Start up softirq processing of callbacks.
934 */
935void invoke_rcu_callbacks(void)
936{
937 raise_softirq(RCU_SOFTIRQ);
938}
939
940void rcu_init(void)
941{
942 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
943}
944
945#endif /* #else #ifdef CONFIG_RCU_BOOST */
946
901#ifdef CONFIG_DEBUG_LOCK_ALLOC 947#ifdef CONFIG_DEBUG_LOCK_ALLOC
902#include <linux/kernel_stat.h> 948#include <linux/kernel_stat.h>
903 949
@@ -913,12 +959,6 @@ void __init rcu_scheduler_starting(void)
913 959
914#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 960#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
915 961
916#ifdef CONFIG_RCU_BOOST
917#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
918#else /* #ifdef CONFIG_RCU_BOOST */
919#define RCU_BOOST_PRIO 1
920#endif /* #else #ifdef CONFIG_RCU_BOOST */
921
922#ifdef CONFIG_RCU_TRACE 962#ifdef CONFIG_RCU_TRACE
923 963
924#ifdef CONFIG_RCU_BOOST 964#ifdef CONFIG_RCU_BOOST
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 98f51b13bb7e..764825c2685c 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -73,7 +73,7 @@ module_param(nreaders, int, 0444);
73MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); 73MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
74module_param(nfakewriters, int, 0444); 74module_param(nfakewriters, int, 0444);
75MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); 75MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads");
76module_param(stat_interval, int, 0444); 76module_param(stat_interval, int, 0644);
77MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); 77MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
78module_param(verbose, bool, 0444); 78module_param(verbose, bool, 0444);
79MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); 79MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
@@ -480,30 +480,6 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
480 call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); 480 call_rcu_bh(&p->rtort_rcu, rcu_torture_cb);
481} 481}
482 482
483struct rcu_bh_torture_synchronize {
484 struct rcu_head head;
485 struct completion completion;
486};
487
488static void rcu_bh_torture_wakeme_after_cb(struct rcu_head *head)
489{
490 struct rcu_bh_torture_synchronize *rcu;
491
492 rcu = container_of(head, struct rcu_bh_torture_synchronize, head);
493 complete(&rcu->completion);
494}
495
496static void rcu_bh_torture_synchronize(void)
497{
498 struct rcu_bh_torture_synchronize rcu;
499
500 init_rcu_head_on_stack(&rcu.head);
501 init_completion(&rcu.completion);
502 call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb);
503 wait_for_completion(&rcu.completion);
504 destroy_rcu_head_on_stack(&rcu.head);
505}
506
507static struct rcu_torture_ops rcu_bh_ops = { 483static struct rcu_torture_ops rcu_bh_ops = {
508 .init = NULL, 484 .init = NULL,
509 .cleanup = NULL, 485 .cleanup = NULL,
@@ -512,7 +488,7 @@ static struct rcu_torture_ops rcu_bh_ops = {
512 .readunlock = rcu_bh_torture_read_unlock, 488 .readunlock = rcu_bh_torture_read_unlock,
513 .completed = rcu_bh_torture_completed, 489 .completed = rcu_bh_torture_completed,
514 .deferred_free = rcu_bh_torture_deferred_free, 490 .deferred_free = rcu_bh_torture_deferred_free,
515 .sync = rcu_bh_torture_synchronize, 491 .sync = synchronize_rcu_bh,
516 .cb_barrier = rcu_barrier_bh, 492 .cb_barrier = rcu_barrier_bh,
517 .fqs = rcu_bh_force_quiescent_state, 493 .fqs = rcu_bh_force_quiescent_state,
518 .stats = NULL, 494 .stats = NULL,
@@ -528,7 +504,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
528 .readunlock = rcu_bh_torture_read_unlock, 504 .readunlock = rcu_bh_torture_read_unlock,
529 .completed = rcu_bh_torture_completed, 505 .completed = rcu_bh_torture_completed,
530 .deferred_free = rcu_sync_torture_deferred_free, 506 .deferred_free = rcu_sync_torture_deferred_free,
531 .sync = rcu_bh_torture_synchronize, 507 .sync = synchronize_rcu_bh,
532 .cb_barrier = NULL, 508 .cb_barrier = NULL,
533 .fqs = rcu_bh_force_quiescent_state, 509 .fqs = rcu_bh_force_quiescent_state,
534 .stats = NULL, 510 .stats = NULL,
@@ -536,6 +512,22 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
536 .name = "rcu_bh_sync" 512 .name = "rcu_bh_sync"
537}; 513};
538 514
515static struct rcu_torture_ops rcu_bh_expedited_ops = {
516 .init = rcu_sync_torture_init,
517 .cleanup = NULL,
518 .readlock = rcu_bh_torture_read_lock,
519 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
520 .readunlock = rcu_bh_torture_read_unlock,
521 .completed = rcu_bh_torture_completed,
522 .deferred_free = rcu_sync_torture_deferred_free,
523 .sync = synchronize_rcu_bh_expedited,
524 .cb_barrier = NULL,
525 .fqs = rcu_bh_force_quiescent_state,
526 .stats = NULL,
527 .irq_capable = 1,
528 .name = "rcu_bh_expedited"
529};
530
539/* 531/*
540 * Definitions for srcu torture testing. 532 * Definitions for srcu torture testing.
541 */ 533 */
@@ -659,11 +651,6 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
659 call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); 651 call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
660} 652}
661 653
662static void sched_torture_synchronize(void)
663{
664 synchronize_sched();
665}
666
667static struct rcu_torture_ops sched_ops = { 654static struct rcu_torture_ops sched_ops = {
668 .init = rcu_sync_torture_init, 655 .init = rcu_sync_torture_init,
669 .cleanup = NULL, 656 .cleanup = NULL,
@@ -672,7 +659,7 @@ static struct rcu_torture_ops sched_ops = {
672 .readunlock = sched_torture_read_unlock, 659 .readunlock = sched_torture_read_unlock,
673 .completed = rcu_no_completed, 660 .completed = rcu_no_completed,
674 .deferred_free = rcu_sched_torture_deferred_free, 661 .deferred_free = rcu_sched_torture_deferred_free,
675 .sync = sched_torture_synchronize, 662 .sync = synchronize_sched,
676 .cb_barrier = rcu_barrier_sched, 663 .cb_barrier = rcu_barrier_sched,
677 .fqs = rcu_sched_force_quiescent_state, 664 .fqs = rcu_sched_force_quiescent_state,
678 .stats = NULL, 665 .stats = NULL,
@@ -688,7 +675,7 @@ static struct rcu_torture_ops sched_sync_ops = {
688 .readunlock = sched_torture_read_unlock, 675 .readunlock = sched_torture_read_unlock,
689 .completed = rcu_no_completed, 676 .completed = rcu_no_completed,
690 .deferred_free = rcu_sync_torture_deferred_free, 677 .deferred_free = rcu_sync_torture_deferred_free,
691 .sync = sched_torture_synchronize, 678 .sync = synchronize_sched,
692 .cb_barrier = NULL, 679 .cb_barrier = NULL,
693 .fqs = rcu_sched_force_quiescent_state, 680 .fqs = rcu_sched_force_quiescent_state,
694 .stats = NULL, 681 .stats = NULL,
@@ -754,7 +741,7 @@ static int rcu_torture_boost(void *arg)
754 do { 741 do {
755 /* Wait for the next test interval. */ 742 /* Wait for the next test interval. */
756 oldstarttime = boost_starttime; 743 oldstarttime = boost_starttime;
757 while (jiffies - oldstarttime > ULONG_MAX / 2) { 744 while (ULONG_CMP_LT(jiffies, oldstarttime)) {
758 schedule_timeout_uninterruptible(1); 745 schedule_timeout_uninterruptible(1);
759 rcu_stutter_wait("rcu_torture_boost"); 746 rcu_stutter_wait("rcu_torture_boost");
760 if (kthread_should_stop() || 747 if (kthread_should_stop() ||
@@ -765,7 +752,7 @@ static int rcu_torture_boost(void *arg)
765 /* Do one boost-test interval. */ 752 /* Do one boost-test interval. */
766 endtime = oldstarttime + test_boost_duration * HZ; 753 endtime = oldstarttime + test_boost_duration * HZ;
767 call_rcu_time = jiffies; 754 call_rcu_time = jiffies;
768 while (jiffies - endtime > ULONG_MAX / 2) { 755 while (ULONG_CMP_LT(jiffies, endtime)) {
769 /* If we don't have a callback in flight, post one. */ 756 /* If we don't have a callback in flight, post one. */
770 if (!rbi.inflight) { 757 if (!rbi.inflight) {
771 smp_mb(); /* RCU core before ->inflight = 1. */ 758 smp_mb(); /* RCU core before ->inflight = 1. */
@@ -792,7 +779,8 @@ static int rcu_torture_boost(void *arg)
792 * interval. Besides, we are running at RT priority, 779 * interval. Besides, we are running at RT priority,
793 * so delays should be relatively rare. 780 * so delays should be relatively rare.
794 */ 781 */
795 while (oldstarttime == boost_starttime) { 782 while (oldstarttime == boost_starttime &&
783 !kthread_should_stop()) {
796 if (mutex_trylock(&boost_mutex)) { 784 if (mutex_trylock(&boost_mutex)) {
797 boost_starttime = jiffies + 785 boost_starttime = jiffies +
798 test_boost_interval * HZ; 786 test_boost_interval * HZ;
@@ -809,11 +797,11 @@ checkwait: rcu_stutter_wait("rcu_torture_boost");
809 797
810 /* Clean up and exit. */ 798 /* Clean up and exit. */
811 VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); 799 VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
812 destroy_rcu_head_on_stack(&rbi.rcu);
813 rcutorture_shutdown_absorb("rcu_torture_boost"); 800 rcutorture_shutdown_absorb("rcu_torture_boost");
814 while (!kthread_should_stop() || rbi.inflight) 801 while (!kthread_should_stop() || rbi.inflight)
815 schedule_timeout_uninterruptible(1); 802 schedule_timeout_uninterruptible(1);
816 smp_mb(); /* order accesses to ->inflight before stack-frame death. */ 803 smp_mb(); /* order accesses to ->inflight before stack-frame death. */
804 destroy_rcu_head_on_stack(&rbi.rcu);
817 return 0; 805 return 0;
818} 806}
819 807
@@ -831,11 +819,13 @@ rcu_torture_fqs(void *arg)
831 VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); 819 VERBOSE_PRINTK_STRING("rcu_torture_fqs task started");
832 do { 820 do {
833 fqs_resume_time = jiffies + fqs_stutter * HZ; 821 fqs_resume_time = jiffies + fqs_stutter * HZ;
834 while (jiffies - fqs_resume_time > LONG_MAX) { 822 while (ULONG_CMP_LT(jiffies, fqs_resume_time) &&
823 !kthread_should_stop()) {
835 schedule_timeout_interruptible(1); 824 schedule_timeout_interruptible(1);
836 } 825 }
837 fqs_burst_remaining = fqs_duration; 826 fqs_burst_remaining = fqs_duration;
838 while (fqs_burst_remaining > 0) { 827 while (fqs_burst_remaining > 0 &&
828 !kthread_should_stop()) {
839 cur_ops->fqs(); 829 cur_ops->fqs();
840 udelay(fqs_holdoff); 830 udelay(fqs_holdoff);
841 fqs_burst_remaining -= fqs_holdoff; 831 fqs_burst_remaining -= fqs_holdoff;
@@ -1280,8 +1270,9 @@ static int rcutorture_booster_init(int cpu)
1280 /* Don't allow time recalculation while creating a new task. */ 1270 /* Don't allow time recalculation while creating a new task. */
1281 mutex_lock(&boost_mutex); 1271 mutex_lock(&boost_mutex);
1282 VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); 1272 VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task");
1283 boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL, 1273 boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL,
1284 "rcu_torture_boost"); 1274 cpu_to_node(cpu),
1275 "rcu_torture_boost");
1285 if (IS_ERR(boost_tasks[cpu])) { 1276 if (IS_ERR(boost_tasks[cpu])) {
1286 retval = PTR_ERR(boost_tasks[cpu]); 1277 retval = PTR_ERR(boost_tasks[cpu]);
1287 VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); 1278 VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed");
@@ -1424,7 +1415,7 @@ rcu_torture_init(void)
1424 int firsterr = 0; 1415 int firsterr = 0;
1425 static struct rcu_torture_ops *torture_ops[] = 1416 static struct rcu_torture_ops *torture_ops[] =
1426 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, 1417 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
1427 &rcu_bh_ops, &rcu_bh_sync_ops, 1418 &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
1428 &srcu_ops, &srcu_expedited_ops, 1419 &srcu_ops, &srcu_expedited_ops,
1429 &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; 1420 &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
1430 1421
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index ba06207b1dd3..e234eb92a177 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -52,13 +52,16 @@
52#include <linux/prefetch.h> 52#include <linux/prefetch.h>
53 53
54#include "rcutree.h" 54#include "rcutree.h"
55#include <trace/events/rcu.h>
56
57#include "rcu.h"
55 58
56/* Data structures. */ 59/* Data structures. */
57 60
58static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; 61static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
59 62
60#define RCU_STATE_INITIALIZER(structname) { \ 63#define RCU_STATE_INITIALIZER(structname) { \
61 .level = { &structname.node[0] }, \ 64 .level = { &structname##_state.node[0] }, \
62 .levelcnt = { \ 65 .levelcnt = { \
63 NUM_RCU_LVL_0, /* root of hierarchy. */ \ 66 NUM_RCU_LVL_0, /* root of hierarchy. */ \
64 NUM_RCU_LVL_1, \ 67 NUM_RCU_LVL_1, \
@@ -69,17 +72,17 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
69 .signaled = RCU_GP_IDLE, \ 72 .signaled = RCU_GP_IDLE, \
70 .gpnum = -300, \ 73 .gpnum = -300, \
71 .completed = -300, \ 74 .completed = -300, \
72 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ 75 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \
73 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ 76 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \
74 .n_force_qs = 0, \ 77 .n_force_qs = 0, \
75 .n_force_qs_ngp = 0, \ 78 .n_force_qs_ngp = 0, \
76 .name = #structname, \ 79 .name = #structname, \
77} 80}
78 81
79struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state); 82struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched);
80DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); 83DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
81 84
82struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); 85struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh);
83DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 86DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
84 87
85static struct rcu_state *rcu_state; 88static struct rcu_state *rcu_state;
@@ -128,8 +131,6 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
128static void invoke_rcu_core(void); 131static void invoke_rcu_core(void);
129static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); 132static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
130 133
131#define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */
132
133/* 134/*
134 * Track the rcutorture test sequence number and the update version 135 * Track the rcutorture test sequence number and the update version
135 * number within a given test. The rcutorture_testseq is incremented 136 * number within a given test. The rcutorture_testseq is incremented
@@ -156,33 +157,41 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
156 * Note a quiescent state. Because we do not need to know 157 * Note a quiescent state. Because we do not need to know
157 * how many quiescent states passed, just if there was at least 158 * how many quiescent states passed, just if there was at least
158 * one since the start of the grace period, this just sets a flag. 159 * one since the start of the grace period, this just sets a flag.
160 * The caller must have disabled preemption.
159 */ 161 */
160void rcu_sched_qs(int cpu) 162void rcu_sched_qs(int cpu)
161{ 163{
162 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); 164 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
163 165
164 rdp->passed_quiesc_completed = rdp->gpnum - 1; 166 rdp->passed_quiesce_gpnum = rdp->gpnum;
165 barrier(); 167 barrier();
166 rdp->passed_quiesc = 1; 168 if (rdp->passed_quiesce == 0)
169 trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs");
170 rdp->passed_quiesce = 1;
167} 171}
168 172
169void rcu_bh_qs(int cpu) 173void rcu_bh_qs(int cpu)
170{ 174{
171 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); 175 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
172 176
173 rdp->passed_quiesc_completed = rdp->gpnum - 1; 177 rdp->passed_quiesce_gpnum = rdp->gpnum;
174 barrier(); 178 barrier();
175 rdp->passed_quiesc = 1; 179 if (rdp->passed_quiesce == 0)
180 trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs");
181 rdp->passed_quiesce = 1;
176} 182}
177 183
178/* 184/*
179 * Note a context switch. This is a quiescent state for RCU-sched, 185 * Note a context switch. This is a quiescent state for RCU-sched,
180 * and requires special handling for preemptible RCU. 186 * and requires special handling for preemptible RCU.
187 * The caller must have disabled preemption.
181 */ 188 */
182void rcu_note_context_switch(int cpu) 189void rcu_note_context_switch(int cpu)
183{ 190{
191 trace_rcu_utilization("Start context switch");
184 rcu_sched_qs(cpu); 192 rcu_sched_qs(cpu);
185 rcu_preempt_note_context_switch(cpu); 193 rcu_preempt_note_context_switch(cpu);
194 trace_rcu_utilization("End context switch");
186} 195}
187EXPORT_SYMBOL_GPL(rcu_note_context_switch); 196EXPORT_SYMBOL_GPL(rcu_note_context_switch);
188 197
@@ -193,7 +202,7 @@ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
193}; 202};
194#endif /* #ifdef CONFIG_NO_HZ */ 203#endif /* #ifdef CONFIG_NO_HZ */
195 204
196static int blimit = 10; /* Maximum callbacks per softirq. */ 205static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
197static int qhimark = 10000; /* If this many pending, ignore blimit. */ 206static int qhimark = 10000; /* If this many pending, ignore blimit. */
198static int qlowmark = 100; /* Once only this many pending, use blimit. */ 207static int qlowmark = 100; /* Once only this many pending, use blimit. */
199 208
@@ -314,6 +323,7 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
314 * trust its state not to change because interrupts are disabled. 323 * trust its state not to change because interrupts are disabled.
315 */ 324 */
316 if (cpu_is_offline(rdp->cpu)) { 325 if (cpu_is_offline(rdp->cpu)) {
326 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
317 rdp->offline_fqs++; 327 rdp->offline_fqs++;
318 return 1; 328 return 1;
319 } 329 }
@@ -354,19 +364,13 @@ void rcu_enter_nohz(void)
354 local_irq_restore(flags); 364 local_irq_restore(flags);
355 return; 365 return;
356 } 366 }
367 trace_rcu_dyntick("Start");
357 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 368 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
358 smp_mb__before_atomic_inc(); /* See above. */ 369 smp_mb__before_atomic_inc(); /* See above. */
359 atomic_inc(&rdtp->dynticks); 370 atomic_inc(&rdtp->dynticks);
360 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ 371 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
361 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 372 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
362 local_irq_restore(flags); 373 local_irq_restore(flags);
363
364 /* If the interrupt queued a callback, get out of dyntick mode. */
365 if (in_irq() &&
366 (__get_cpu_var(rcu_sched_data).nxtlist ||
367 __get_cpu_var(rcu_bh_data).nxtlist ||
368 rcu_preempt_needs_cpu(smp_processor_id())))
369 set_need_resched();
370} 374}
371 375
372/* 376/*
@@ -391,6 +395,7 @@ void rcu_exit_nohz(void)
391 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 395 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
392 smp_mb__after_atomic_inc(); /* See above. */ 396 smp_mb__after_atomic_inc(); /* See above. */
393 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 397 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
398 trace_rcu_dyntick("End");
394 local_irq_restore(flags); 399 local_irq_restore(flags);
395} 400}
396 401
@@ -481,11 +486,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
481 */ 486 */
482static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) 487static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
483{ 488{
484 unsigned long curr; 489 unsigned int curr;
485 unsigned long snap; 490 unsigned int snap;
486 491
487 curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks); 492 curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
488 snap = (unsigned long)rdp->dynticks_snap; 493 snap = (unsigned int)rdp->dynticks_snap;
489 494
490 /* 495 /*
491 * If the CPU passed through or entered a dynticks idle phase with 496 * If the CPU passed through or entered a dynticks idle phase with
@@ -495,7 +500,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
495 * read-side critical section that started before the beginning 500 * read-side critical section that started before the beginning
496 * of the current RCU grace period. 501 * of the current RCU grace period.
497 */ 502 */
498 if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { 503 if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
504 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti");
499 rdp->dynticks_fqs++; 505 rdp->dynticks_fqs++;
500 return 1; 506 return 1;
501 } 507 }
@@ -537,6 +543,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
537 int cpu; 543 int cpu;
538 long delta; 544 long delta;
539 unsigned long flags; 545 unsigned long flags;
546 int ndetected;
540 struct rcu_node *rnp = rcu_get_root(rsp); 547 struct rcu_node *rnp = rcu_get_root(rsp);
541 548
542 /* Only let one CPU complain about others per time interval. */ 549 /* Only let one CPU complain about others per time interval. */
@@ -553,7 +560,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
553 * Now rat on any tasks that got kicked up to the root rcu_node 560 * Now rat on any tasks that got kicked up to the root rcu_node
554 * due to CPU offlining. 561 * due to CPU offlining.
555 */ 562 */
556 rcu_print_task_stall(rnp); 563 ndetected = rcu_print_task_stall(rnp);
557 raw_spin_unlock_irqrestore(&rnp->lock, flags); 564 raw_spin_unlock_irqrestore(&rnp->lock, flags);
558 565
559 /* 566 /*
@@ -565,17 +572,22 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
565 rsp->name); 572 rsp->name);
566 rcu_for_each_leaf_node(rsp, rnp) { 573 rcu_for_each_leaf_node(rsp, rnp) {
567 raw_spin_lock_irqsave(&rnp->lock, flags); 574 raw_spin_lock_irqsave(&rnp->lock, flags);
568 rcu_print_task_stall(rnp); 575 ndetected += rcu_print_task_stall(rnp);
569 raw_spin_unlock_irqrestore(&rnp->lock, flags); 576 raw_spin_unlock_irqrestore(&rnp->lock, flags);
570 if (rnp->qsmask == 0) 577 if (rnp->qsmask == 0)
571 continue; 578 continue;
572 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 579 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
573 if (rnp->qsmask & (1UL << cpu)) 580 if (rnp->qsmask & (1UL << cpu)) {
574 printk(" %d", rnp->grplo + cpu); 581 printk(" %d", rnp->grplo + cpu);
582 ndetected++;
583 }
575 } 584 }
576 printk("} (detected by %d, t=%ld jiffies)\n", 585 printk("} (detected by %d, t=%ld jiffies)\n",
577 smp_processor_id(), (long)(jiffies - rsp->gp_start)); 586 smp_processor_id(), (long)(jiffies - rsp->gp_start));
578 trigger_all_cpu_backtrace(); 587 if (ndetected == 0)
588 printk(KERN_ERR "INFO: Stall ended before state dump start\n");
589 else if (!trigger_all_cpu_backtrace())
590 dump_stack();
579 591
580 /* If so configured, complain about tasks blocking the grace period. */ 592 /* If so configured, complain about tasks blocking the grace period. */
581 593
@@ -596,7 +608,8 @@ static void print_cpu_stall(struct rcu_state *rsp)
596 */ 608 */
597 printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", 609 printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n",
598 rsp->name, smp_processor_id(), jiffies - rsp->gp_start); 610 rsp->name, smp_processor_id(), jiffies - rsp->gp_start);
599 trigger_all_cpu_backtrace(); 611 if (!trigger_all_cpu_backtrace())
612 dump_stack();
600 613
601 raw_spin_lock_irqsave(&rnp->lock, flags); 614 raw_spin_lock_irqsave(&rnp->lock, flags);
602 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) 615 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
@@ -678,9 +691,10 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct
678 * go looking for one. 691 * go looking for one.
679 */ 692 */
680 rdp->gpnum = rnp->gpnum; 693 rdp->gpnum = rnp->gpnum;
694 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart");
681 if (rnp->qsmask & rdp->grpmask) { 695 if (rnp->qsmask & rdp->grpmask) {
682 rdp->qs_pending = 1; 696 rdp->qs_pending = 1;
683 rdp->passed_quiesc = 0; 697 rdp->passed_quiesce = 0;
684 } else 698 } else
685 rdp->qs_pending = 0; 699 rdp->qs_pending = 0;
686 } 700 }
@@ -741,6 +755,7 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat
741 755
742 /* Remember that we saw this grace-period completion. */ 756 /* Remember that we saw this grace-period completion. */
743 rdp->completed = rnp->completed; 757 rdp->completed = rnp->completed;
758 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend");
744 759
745 /* 760 /*
746 * If we were in an extended quiescent state, we may have 761 * If we were in an extended quiescent state, we may have
@@ -826,31 +841,31 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
826 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 841 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
827 struct rcu_node *rnp = rcu_get_root(rsp); 842 struct rcu_node *rnp = rcu_get_root(rsp);
828 843
829 if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { 844 if (!rcu_scheduler_fully_active ||
830 if (cpu_needs_another_gp(rsp, rdp)) 845 !cpu_needs_another_gp(rsp, rdp)) {
831 rsp->fqs_need_gp = 1; 846 /*
832 if (rnp->completed == rsp->completed) { 847 * Either the scheduler hasn't yet spawned the first
833 raw_spin_unlock_irqrestore(&rnp->lock, flags); 848 * non-idle task or this CPU does not need another
834 return; 849 * grace period. Either way, don't start a new grace
835 } 850 * period.
836 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 851 */
852 raw_spin_unlock_irqrestore(&rnp->lock, flags);
853 return;
854 }
837 855
856 if (rsp->fqs_active) {
838 /* 857 /*
839 * Propagate new ->completed value to rcu_node structures 858 * This CPU needs a grace period, but force_quiescent_state()
840 * so that other CPUs don't have to wait until the start 859 * is running. Tell it to start one on this CPU's behalf.
841 * of the next grace period to process their callbacks.
842 */ 860 */
843 rcu_for_each_node_breadth_first(rsp, rnp) { 861 rsp->fqs_need_gp = 1;
844 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 862 raw_spin_unlock_irqrestore(&rnp->lock, flags);
845 rnp->completed = rsp->completed;
846 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
847 }
848 local_irq_restore(flags);
849 return; 863 return;
850 } 864 }
851 865
852 /* Advance to a new grace period and initialize state. */ 866 /* Advance to a new grace period and initialize state. */
853 rsp->gpnum++; 867 rsp->gpnum++;
868 trace_rcu_grace_period(rsp->name, rsp->gpnum, "start");
854 WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); 869 WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT);
855 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 870 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
856 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 871 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
@@ -865,6 +880,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
865 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ 880 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
866 rcu_start_gp_per_cpu(rsp, rnp, rdp); 881 rcu_start_gp_per_cpu(rsp, rnp, rdp);
867 rcu_preempt_boost_start_gp(rnp); 882 rcu_preempt_boost_start_gp(rnp);
883 trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
884 rnp->level, rnp->grplo,
885 rnp->grphi, rnp->qsmask);
868 raw_spin_unlock_irqrestore(&rnp->lock, flags); 886 raw_spin_unlock_irqrestore(&rnp->lock, flags);
869 return; 887 return;
870 } 888 }
@@ -901,6 +919,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
901 if (rnp == rdp->mynode) 919 if (rnp == rdp->mynode)
902 rcu_start_gp_per_cpu(rsp, rnp, rdp); 920 rcu_start_gp_per_cpu(rsp, rnp, rdp);
903 rcu_preempt_boost_start_gp(rnp); 921 rcu_preempt_boost_start_gp(rnp);
922 trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
923 rnp->level, rnp->grplo,
924 rnp->grphi, rnp->qsmask);
904 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 925 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
905 } 926 }
906 927
@@ -922,6 +943,8 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
922 __releases(rcu_get_root(rsp)->lock) 943 __releases(rcu_get_root(rsp)->lock)
923{ 944{
924 unsigned long gp_duration; 945 unsigned long gp_duration;
946 struct rcu_node *rnp = rcu_get_root(rsp);
947 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
925 948
926 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 949 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
927 950
@@ -933,7 +956,41 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
933 gp_duration = jiffies - rsp->gp_start; 956 gp_duration = jiffies - rsp->gp_start;
934 if (gp_duration > rsp->gp_max) 957 if (gp_duration > rsp->gp_max)
935 rsp->gp_max = gp_duration; 958 rsp->gp_max = gp_duration;
936 rsp->completed = rsp->gpnum; 959
960 /*
961 * We know the grace period is complete, but to everyone else
962 * it appears to still be ongoing. But it is also the case
963 * that to everyone else it looks like there is nothing that
964 * they can do to advance the grace period. It is therefore
965 * safe for us to drop the lock in order to mark the grace
966 * period as completed in all of the rcu_node structures.
967 *
968 * But if this CPU needs another grace period, it will take
969 * care of this while initializing the next grace period.
970 * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL
971 * because the callbacks have not yet been advanced: Those
972 * callbacks are waiting on the grace period that just now
973 * completed.
974 */
975 if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) {
976 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
977
978 /*
979 * Propagate new ->completed value to rcu_node structures
980 * so that other CPUs don't have to wait until the start
981 * of the next grace period to process their callbacks.
982 */
983 rcu_for_each_node_breadth_first(rsp, rnp) {
984 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
985 rnp->completed = rsp->gpnum;
986 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
987 }
988 rnp = rcu_get_root(rsp);
989 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
990 }
991
992 rsp->completed = rsp->gpnum; /* Declare the grace period complete. */
993 trace_rcu_grace_period(rsp->name, rsp->completed, "end");
937 rsp->signaled = RCU_GP_IDLE; 994 rsp->signaled = RCU_GP_IDLE;
938 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ 995 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
939} 996}
@@ -962,6 +1019,10 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
962 return; 1019 return;
963 } 1020 }
964 rnp->qsmask &= ~mask; 1021 rnp->qsmask &= ~mask;
1022 trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
1023 mask, rnp->qsmask, rnp->level,
1024 rnp->grplo, rnp->grphi,
1025 !!rnp->gp_tasks);
965 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 1026 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
966 1027
967 /* Other bits still set at this level, so done. */ 1028 /* Other bits still set at this level, so done. */
@@ -1000,7 +1061,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
1000 * based on quiescent states detected in an earlier grace period! 1061 * based on quiescent states detected in an earlier grace period!
1001 */ 1062 */
1002static void 1063static void
1003rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) 1064rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastgp)
1004{ 1065{
1005 unsigned long flags; 1066 unsigned long flags;
1006 unsigned long mask; 1067 unsigned long mask;
@@ -1008,17 +1069,15 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las
1008 1069
1009 rnp = rdp->mynode; 1070 rnp = rdp->mynode;
1010 raw_spin_lock_irqsave(&rnp->lock, flags); 1071 raw_spin_lock_irqsave(&rnp->lock, flags);
1011 if (lastcomp != rnp->completed) { 1072 if (lastgp != rnp->gpnum || rnp->completed == rnp->gpnum) {
1012 1073
1013 /* 1074 /*
1014 * Someone beat us to it for this grace period, so leave. 1075 * The grace period in which this quiescent state was
1015 * The race with GP start is resolved by the fact that we 1076 * recorded has ended, so don't report it upwards.
1016 * hold the leaf rcu_node lock, so that the per-CPU bits 1077 * We will instead need a new quiescent state that lies
1017 * cannot yet be initialized -- so we would simply find our 1078 * within the current grace period.
1018 * CPU's bit already cleared in rcu_report_qs_rnp() if this
1019 * race occurred.
1020 */ 1079 */
1021 rdp->passed_quiesc = 0; /* try again later! */ 1080 rdp->passed_quiesce = 0; /* need qs for new gp. */
1022 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1081 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1023 return; 1082 return;
1024 } 1083 }
@@ -1062,14 +1121,14 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1062 * Was there a quiescent state since the beginning of the grace 1121 * Was there a quiescent state since the beginning of the grace
1063 * period? If no, then exit and wait for the next call. 1122 * period? If no, then exit and wait for the next call.
1064 */ 1123 */
1065 if (!rdp->passed_quiesc) 1124 if (!rdp->passed_quiesce)
1066 return; 1125 return;
1067 1126
1068 /* 1127 /*
1069 * Tell RCU we are done (but rcu_report_qs_rdp() will be the 1128 * Tell RCU we are done (but rcu_report_qs_rdp() will be the
1070 * judge of that). 1129 * judge of that).
1071 */ 1130 */
1072 rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); 1131 rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum);
1073} 1132}
1074 1133
1075#ifdef CONFIG_HOTPLUG_CPU 1134#ifdef CONFIG_HOTPLUG_CPU
@@ -1130,11 +1189,20 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1130 if (rnp->qsmaskinit != 0) { 1189 if (rnp->qsmaskinit != 0) {
1131 if (rnp != rdp->mynode) 1190 if (rnp != rdp->mynode)
1132 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1191 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1192 else
1193 trace_rcu_grace_period(rsp->name,
1194 rnp->gpnum + 1 -
1195 !!(rnp->qsmask & mask),
1196 "cpuofl");
1133 break; 1197 break;
1134 } 1198 }
1135 if (rnp == rdp->mynode) 1199 if (rnp == rdp->mynode) {
1200 trace_rcu_grace_period(rsp->name,
1201 rnp->gpnum + 1 -
1202 !!(rnp->qsmask & mask),
1203 "cpuofl");
1136 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); 1204 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
1137 else 1205 } else
1138 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1206 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1139 mask = rnp->grpmask; 1207 mask = rnp->grpmask;
1140 rnp = rnp->parent; 1208 rnp = rnp->parent;
@@ -1190,17 +1258,22 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1190{ 1258{
1191 unsigned long flags; 1259 unsigned long flags;
1192 struct rcu_head *next, *list, **tail; 1260 struct rcu_head *next, *list, **tail;
1193 int count; 1261 int bl, count;
1194 1262
1195 /* If no callbacks are ready, just return.*/ 1263 /* If no callbacks are ready, just return.*/
1196 if (!cpu_has_callbacks_ready_to_invoke(rdp)) 1264 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
1265 trace_rcu_batch_start(rsp->name, 0, 0);
1266 trace_rcu_batch_end(rsp->name, 0);
1197 return; 1267 return;
1268 }
1198 1269
1199 /* 1270 /*
1200 * Extract the list of ready callbacks, disabling to prevent 1271 * Extract the list of ready callbacks, disabling to prevent
1201 * races with call_rcu() from interrupt handlers. 1272 * races with call_rcu() from interrupt handlers.
1202 */ 1273 */
1203 local_irq_save(flags); 1274 local_irq_save(flags);
1275 bl = rdp->blimit;
1276 trace_rcu_batch_start(rsp->name, rdp->qlen, bl);
1204 list = rdp->nxtlist; 1277 list = rdp->nxtlist;
1205 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; 1278 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
1206 *rdp->nxttail[RCU_DONE_TAIL] = NULL; 1279 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
@@ -1216,13 +1289,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1216 next = list->next; 1289 next = list->next;
1217 prefetch(next); 1290 prefetch(next);
1218 debug_rcu_head_unqueue(list); 1291 debug_rcu_head_unqueue(list);
1219 __rcu_reclaim(list); 1292 __rcu_reclaim(rsp->name, list);
1220 list = next; 1293 list = next;
1221 if (++count >= rdp->blimit) 1294 if (++count >= bl)
1222 break; 1295 break;
1223 } 1296 }
1224 1297
1225 local_irq_save(flags); 1298 local_irq_save(flags);
1299 trace_rcu_batch_end(rsp->name, count);
1226 1300
1227 /* Update count, and requeue any remaining callbacks. */ 1301 /* Update count, and requeue any remaining callbacks. */
1228 rdp->qlen -= count; 1302 rdp->qlen -= count;
@@ -1250,7 +1324,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1250 1324
1251 local_irq_restore(flags); 1325 local_irq_restore(flags);
1252 1326
1253 /* Re-raise the RCU softirq if there are callbacks remaining. */ 1327 /* Re-invoke RCU core processing if there are callbacks remaining. */
1254 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1328 if (cpu_has_callbacks_ready_to_invoke(rdp))
1255 invoke_rcu_core(); 1329 invoke_rcu_core();
1256} 1330}
@@ -1258,7 +1332,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1258/* 1332/*
1259 * Check to see if this CPU is in a non-context-switch quiescent state 1333 * Check to see if this CPU is in a non-context-switch quiescent state
1260 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). 1334 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
1261 * Also schedule the RCU softirq handler. 1335 * Also schedule RCU core processing.
1262 * 1336 *
1263 * This function must be called with hardirqs disabled. It is normally 1337 * This function must be called with hardirqs disabled. It is normally
1264 * invoked from the scheduling-clock interrupt. If rcu_pending returns 1338 * invoked from the scheduling-clock interrupt. If rcu_pending returns
@@ -1266,6 +1340,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1266 */ 1340 */
1267void rcu_check_callbacks(int cpu, int user) 1341void rcu_check_callbacks(int cpu, int user)
1268{ 1342{
1343 trace_rcu_utilization("Start scheduler-tick");
1269 if (user || 1344 if (user ||
1270 (idle_cpu(cpu) && rcu_scheduler_active && 1345 (idle_cpu(cpu) && rcu_scheduler_active &&
1271 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { 1346 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
@@ -1299,6 +1374,7 @@ void rcu_check_callbacks(int cpu, int user)
1299 rcu_preempt_check_callbacks(cpu); 1374 rcu_preempt_check_callbacks(cpu);
1300 if (rcu_pending(cpu)) 1375 if (rcu_pending(cpu))
1301 invoke_rcu_core(); 1376 invoke_rcu_core();
1377 trace_rcu_utilization("End scheduler-tick");
1302} 1378}
1303 1379
1304#ifdef CONFIG_SMP 1380#ifdef CONFIG_SMP
@@ -1360,10 +1436,14 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1360 unsigned long flags; 1436 unsigned long flags;
1361 struct rcu_node *rnp = rcu_get_root(rsp); 1437 struct rcu_node *rnp = rcu_get_root(rsp);
1362 1438
1363 if (!rcu_gp_in_progress(rsp)) 1439 trace_rcu_utilization("Start fqs");
1440 if (!rcu_gp_in_progress(rsp)) {
1441 trace_rcu_utilization("End fqs");
1364 return; /* No grace period in progress, nothing to force. */ 1442 return; /* No grace period in progress, nothing to force. */
1443 }
1365 if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { 1444 if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) {
1366 rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ 1445 rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */
1446 trace_rcu_utilization("End fqs");
1367 return; /* Someone else is already on the job. */ 1447 return; /* Someone else is already on the job. */
1368 } 1448 }
1369 if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) 1449 if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies))
@@ -1412,11 +1492,13 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1412 raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ 1492 raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */
1413 rsp->fqs_need_gp = 0; 1493 rsp->fqs_need_gp = 0;
1414 rcu_start_gp(rsp, flags); /* releases rnp->lock */ 1494 rcu_start_gp(rsp, flags); /* releases rnp->lock */
1495 trace_rcu_utilization("End fqs");
1415 return; 1496 return;
1416 } 1497 }
1417 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1498 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1418unlock_fqs_ret: 1499unlock_fqs_ret:
1419 raw_spin_unlock_irqrestore(&rsp->fqslock, flags); 1500 raw_spin_unlock_irqrestore(&rsp->fqslock, flags);
1501 trace_rcu_utilization("End fqs");
1420} 1502}
1421 1503
1422#else /* #ifdef CONFIG_SMP */ 1504#else /* #ifdef CONFIG_SMP */
@@ -1429,9 +1511,9 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1429#endif /* #else #ifdef CONFIG_SMP */ 1511#endif /* #else #ifdef CONFIG_SMP */
1430 1512
1431/* 1513/*
1432 * This does the RCU processing work from softirq context for the 1514 * This does the RCU core processing work for the specified rcu_state
1433 * specified rcu_state and rcu_data structures. This may be called 1515 * and rcu_data structures. This may be called only from the CPU to
1434 * only from the CPU to whom the rdp belongs. 1516 * whom the rdp belongs.
1435 */ 1517 */
1436static void 1518static void
1437__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 1519__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
@@ -1468,24 +1550,24 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1468} 1550}
1469 1551
1470/* 1552/*
1471 * Do softirq processing for the current CPU. 1553 * Do RCU core processing for the current CPU.
1472 */ 1554 */
1473static void rcu_process_callbacks(struct softirq_action *unused) 1555static void rcu_process_callbacks(struct softirq_action *unused)
1474{ 1556{
1557 trace_rcu_utilization("Start RCU core");
1475 __rcu_process_callbacks(&rcu_sched_state, 1558 __rcu_process_callbacks(&rcu_sched_state,
1476 &__get_cpu_var(rcu_sched_data)); 1559 &__get_cpu_var(rcu_sched_data));
1477 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1560 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1478 rcu_preempt_process_callbacks(); 1561 rcu_preempt_process_callbacks();
1479 1562 trace_rcu_utilization("End RCU core");
1480 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */
1481 rcu_needs_cpu_flush();
1482} 1563}
1483 1564
1484/* 1565/*
1485 * Wake up the current CPU's kthread. This replaces raise_softirq() 1566 * Schedule RCU callback invocation. If the specified type of RCU
1486 * in earlier versions of RCU. Note that because we are running on 1567 * does not support RCU priority boosting, just do a direct call,
1487 * the current CPU with interrupts disabled, the rcu_cpu_kthread_task 1568 * otherwise wake up the per-CPU kernel kthread. Note that because we
1488 * cannot disappear out from under us. 1569 * are running on the current CPU with interrupts disabled, the
1570 * rcu_cpu_kthread_task cannot disappear out from under us.
1489 */ 1571 */
1490static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 1572static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1491{ 1573{
@@ -1530,6 +1612,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1530 rdp->nxttail[RCU_NEXT_TAIL] = &head->next; 1612 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
1531 rdp->qlen++; 1613 rdp->qlen++;
1532 1614
1615 if (__is_kfree_rcu_offset((unsigned long)func))
1616 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
1617 rdp->qlen);
1618 else
1619 trace_rcu_callback(rsp->name, head, rdp->qlen);
1620
1533 /* If interrupts were disabled, don't dive into RCU core. */ 1621 /* If interrupts were disabled, don't dive into RCU core. */
1534 if (irqs_disabled_flags(flags)) { 1622 if (irqs_disabled_flags(flags)) {
1535 local_irq_restore(flags); 1623 local_irq_restore(flags);
@@ -1613,18 +1701,9 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
1613 */ 1701 */
1614void synchronize_sched(void) 1702void synchronize_sched(void)
1615{ 1703{
1616 struct rcu_synchronize rcu;
1617
1618 if (rcu_blocking_is_gp()) 1704 if (rcu_blocking_is_gp())
1619 return; 1705 return;
1620 1706 wait_rcu_gp(call_rcu_sched);
1621 init_rcu_head_on_stack(&rcu.head);
1622 init_completion(&rcu.completion);
1623 /* Will wake me after RCU finished. */
1624 call_rcu_sched(&rcu.head, wakeme_after_rcu);
1625 /* Wait for it. */
1626 wait_for_completion(&rcu.completion);
1627 destroy_rcu_head_on_stack(&rcu.head);
1628} 1707}
1629EXPORT_SYMBOL_GPL(synchronize_sched); 1708EXPORT_SYMBOL_GPL(synchronize_sched);
1630 1709
@@ -1639,18 +1718,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched);
1639 */ 1718 */
1640void synchronize_rcu_bh(void) 1719void synchronize_rcu_bh(void)
1641{ 1720{
1642 struct rcu_synchronize rcu;
1643
1644 if (rcu_blocking_is_gp()) 1721 if (rcu_blocking_is_gp())
1645 return; 1722 return;
1646 1723 wait_rcu_gp(call_rcu_bh);
1647 init_rcu_head_on_stack(&rcu.head);
1648 init_completion(&rcu.completion);
1649 /* Will wake me after RCU finished. */
1650 call_rcu_bh(&rcu.head, wakeme_after_rcu);
1651 /* Wait for it. */
1652 wait_for_completion(&rcu.completion);
1653 destroy_rcu_head_on_stack(&rcu.head);
1654} 1724}
1655EXPORT_SYMBOL_GPL(synchronize_rcu_bh); 1725EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
1656 1726
@@ -1671,7 +1741,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1671 check_cpu_stall(rsp, rdp); 1741 check_cpu_stall(rsp, rdp);
1672 1742
1673 /* Is the RCU core waiting for a quiescent state from this CPU? */ 1743 /* Is the RCU core waiting for a quiescent state from this CPU? */
1674 if (rdp->qs_pending && !rdp->passed_quiesc) { 1744 if (rcu_scheduler_fully_active &&
1745 rdp->qs_pending && !rdp->passed_quiesce) {
1675 1746
1676 /* 1747 /*
1677 * If force_quiescent_state() coming soon and this CPU 1748 * If force_quiescent_state() coming soon and this CPU
@@ -1683,7 +1754,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1683 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, 1754 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1,
1684 jiffies)) 1755 jiffies))
1685 set_need_resched(); 1756 set_need_resched();
1686 } else if (rdp->qs_pending && rdp->passed_quiesc) { 1757 } else if (rdp->qs_pending && rdp->passed_quiesce) {
1687 rdp->n_rp_report_qs++; 1758 rdp->n_rp_report_qs++;
1688 return 1; 1759 return 1;
1689 } 1760 }
@@ -1846,6 +1917,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
1846 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 1917 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
1847#endif /* #ifdef CONFIG_NO_HZ */ 1918#endif /* #ifdef CONFIG_NO_HZ */
1848 rdp->cpu = cpu; 1919 rdp->cpu = cpu;
1920 rdp->rsp = rsp;
1849 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1921 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1850} 1922}
1851 1923
@@ -1865,8 +1937,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
1865 1937
1866 /* Set up local state, ensuring consistent view of global state. */ 1938 /* Set up local state, ensuring consistent view of global state. */
1867 raw_spin_lock_irqsave(&rnp->lock, flags); 1939 raw_spin_lock_irqsave(&rnp->lock, flags);
1868 rdp->passed_quiesc = 0; /* We could be racing with new GP, */
1869 rdp->qs_pending = 1; /* so set up to respond to current GP. */
1870 rdp->beenonline = 1; /* We have now been online. */ 1940 rdp->beenonline = 1; /* We have now been online. */
1871 rdp->preemptible = preemptible; 1941 rdp->preemptible = preemptible;
1872 rdp->qlen_last_fqs_check = 0; 1942 rdp->qlen_last_fqs_check = 0;
@@ -1891,9 +1961,17 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
1891 rnp->qsmaskinit |= mask; 1961 rnp->qsmaskinit |= mask;
1892 mask = rnp->grpmask; 1962 mask = rnp->grpmask;
1893 if (rnp == rdp->mynode) { 1963 if (rnp == rdp->mynode) {
1894 rdp->gpnum = rnp->completed; /* if GP in progress... */ 1964 /*
1965 * If there is a grace period in progress, we will
1966 * set up to wait for it next time we run the
1967 * RCU core code.
1968 */
1969 rdp->gpnum = rnp->completed;
1895 rdp->completed = rnp->completed; 1970 rdp->completed = rnp->completed;
1896 rdp->passed_quiesc_completed = rnp->completed - 1; 1971 rdp->passed_quiesce = 0;
1972 rdp->qs_pending = 0;
1973 rdp->passed_quiesce_gpnum = rnp->gpnum - 1;
1974 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl");
1897 } 1975 }
1898 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ 1976 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
1899 rnp = rnp->parent; 1977 rnp = rnp->parent;
@@ -1919,6 +1997,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
1919 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 1997 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
1920 struct rcu_node *rnp = rdp->mynode; 1998 struct rcu_node *rnp = rdp->mynode;
1921 1999
2000 trace_rcu_utilization("Start CPU hotplug");
1922 switch (action) { 2001 switch (action) {
1923 case CPU_UP_PREPARE: 2002 case CPU_UP_PREPARE:
1924 case CPU_UP_PREPARE_FROZEN: 2003 case CPU_UP_PREPARE_FROZEN:
@@ -1954,6 +2033,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
1954 default: 2033 default:
1955 break; 2034 break;
1956 } 2035 }
2036 trace_rcu_utilization("End CPU hotplug");
1957 return NOTIFY_OK; 2037 return NOTIFY_OK;
1958} 2038}
1959 2039
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 01b2ccda26fb..849ce9ec51fe 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -230,9 +230,9 @@ struct rcu_data {
230 /* in order to detect GP end. */ 230 /* in order to detect GP end. */
231 unsigned long gpnum; /* Highest gp number that this CPU */ 231 unsigned long gpnum; /* Highest gp number that this CPU */
232 /* is aware of having started. */ 232 /* is aware of having started. */
233 unsigned long passed_quiesc_completed; 233 unsigned long passed_quiesce_gpnum;
234 /* Value of completed at time of qs. */ 234 /* gpnum at time of quiescent state. */
235 bool passed_quiesc; /* User-mode/idle loop etc. */ 235 bool passed_quiesce; /* User-mode/idle loop etc. */
236 bool qs_pending; /* Core waits for quiesc state. */ 236 bool qs_pending; /* Core waits for quiesc state. */
237 bool beenonline; /* CPU online at least once. */ 237 bool beenonline; /* CPU online at least once. */
238 bool preemptible; /* Preemptible RCU? */ 238 bool preemptible; /* Preemptible RCU? */
@@ -299,6 +299,7 @@ struct rcu_data {
299 unsigned long n_rp_need_nothing; 299 unsigned long n_rp_need_nothing;
300 300
301 int cpu; 301 int cpu;
302 struct rcu_state *rsp;
302}; 303};
303 304
304/* Values for signaled field in struct rcu_state. */ 305/* Values for signaled field in struct rcu_state. */
@@ -417,6 +418,13 @@ extern struct rcu_state rcu_preempt_state;
417DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); 418DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
418#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 419#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
419 420
421#ifdef CONFIG_RCU_BOOST
422DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
423DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
424DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
425DECLARE_PER_CPU(char, rcu_cpu_has_work);
426#endif /* #ifdef CONFIG_RCU_BOOST */
427
420#ifndef RCU_TREE_NONCORE 428#ifndef RCU_TREE_NONCORE
421 429
422/* Forward declarations for rcutree_plugin.h */ 430/* Forward declarations for rcutree_plugin.h */
@@ -430,7 +438,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
430static void rcu_stop_cpu_kthread(int cpu); 438static void rcu_stop_cpu_kthread(int cpu);
431#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 439#endif /* #ifdef CONFIG_HOTPLUG_CPU */
432static void rcu_print_detail_task_stall(struct rcu_state *rsp); 440static void rcu_print_detail_task_stall(struct rcu_state *rsp);
433static void rcu_print_task_stall(struct rcu_node *rnp); 441static int rcu_print_task_stall(struct rcu_node *rnp);
434static void rcu_preempt_stall_reset(void); 442static void rcu_preempt_stall_reset(void);
435static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); 443static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
436#ifdef CONFIG_HOTPLUG_CPU 444#ifdef CONFIG_HOTPLUG_CPU
@@ -450,7 +458,6 @@ static int rcu_preempt_needs_cpu(int cpu);
450static void __cpuinit rcu_preempt_init_percpu_data(int cpu); 458static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
451static void rcu_preempt_send_cbs_to_online(void); 459static void rcu_preempt_send_cbs_to_online(void);
452static void __init __rcu_init_preempt(void); 460static void __init __rcu_init_preempt(void);
453static void rcu_needs_cpu_flush(void);
454static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 461static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
455static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 462static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
456static void invoke_rcu_callbacks_kthread(void); 463static void invoke_rcu_callbacks_kthread(void);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 8aafbb80b8b0..4b9b9f8a4184 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -27,6 +27,14 @@
27#include <linux/delay.h> 27#include <linux/delay.h>
28#include <linux/stop_machine.h> 28#include <linux/stop_machine.h>
29 29
30#define RCU_KTHREAD_PRIO 1
31
32#ifdef CONFIG_RCU_BOOST
33#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
34#else
35#define RCU_BOOST_PRIO RCU_KTHREAD_PRIO
36#endif
37
30/* 38/*
31 * Check the RCU kernel configuration parameters and print informative 39 * Check the RCU kernel configuration parameters and print informative
32 * messages about anything out of the ordinary. If you like #ifdef, you 40 * messages about anything out of the ordinary. If you like #ifdef, you
@@ -64,7 +72,7 @@ static void __init rcu_bootup_announce_oddness(void)
64 72
65#ifdef CONFIG_TREE_PREEMPT_RCU 73#ifdef CONFIG_TREE_PREEMPT_RCU
66 74
67struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); 75struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt);
68DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 76DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
69static struct rcu_state *rcu_state = &rcu_preempt_state; 77static struct rcu_state *rcu_state = &rcu_preempt_state;
70 78
@@ -122,9 +130,11 @@ static void rcu_preempt_qs(int cpu)
122{ 130{
123 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 131 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
124 132
125 rdp->passed_quiesc_completed = rdp->gpnum - 1; 133 rdp->passed_quiesce_gpnum = rdp->gpnum;
126 barrier(); 134 barrier();
127 rdp->passed_quiesc = 1; 135 if (rdp->passed_quiesce == 0)
136 trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs");
137 rdp->passed_quiesce = 1;
128 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 138 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
129} 139}
130 140
@@ -190,6 +200,11 @@ static void rcu_preempt_note_context_switch(int cpu)
190 if (rnp->qsmask & rdp->grpmask) 200 if (rnp->qsmask & rdp->grpmask)
191 rnp->gp_tasks = &t->rcu_node_entry; 201 rnp->gp_tasks = &t->rcu_node_entry;
192 } 202 }
203 trace_rcu_preempt_task(rdp->rsp->name,
204 t->pid,
205 (rnp->qsmask & rdp->grpmask)
206 ? rnp->gpnum
207 : rnp->gpnum + 1);
193 raw_spin_unlock_irqrestore(&rnp->lock, flags); 208 raw_spin_unlock_irqrestore(&rnp->lock, flags);
194 } else if (t->rcu_read_lock_nesting < 0 && 209 } else if (t->rcu_read_lock_nesting < 0 &&
195 t->rcu_read_unlock_special) { 210 t->rcu_read_unlock_special) {
@@ -299,6 +314,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
299 int empty_exp; 314 int empty_exp;
300 unsigned long flags; 315 unsigned long flags;
301 struct list_head *np; 316 struct list_head *np;
317#ifdef CONFIG_RCU_BOOST
318 struct rt_mutex *rbmp = NULL;
319#endif /* #ifdef CONFIG_RCU_BOOST */
302 struct rcu_node *rnp; 320 struct rcu_node *rnp;
303 int special; 321 int special;
304 322
@@ -344,6 +362,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
344 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 362 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
345 np = rcu_next_node_entry(t, rnp); 363 np = rcu_next_node_entry(t, rnp);
346 list_del_init(&t->rcu_node_entry); 364 list_del_init(&t->rcu_node_entry);
365 t->rcu_blocked_node = NULL;
366 trace_rcu_unlock_preempted_task("rcu_preempt",
367 rnp->gpnum, t->pid);
347 if (&t->rcu_node_entry == rnp->gp_tasks) 368 if (&t->rcu_node_entry == rnp->gp_tasks)
348 rnp->gp_tasks = np; 369 rnp->gp_tasks = np;
349 if (&t->rcu_node_entry == rnp->exp_tasks) 370 if (&t->rcu_node_entry == rnp->exp_tasks)
@@ -351,30 +372,34 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
351#ifdef CONFIG_RCU_BOOST 372#ifdef CONFIG_RCU_BOOST
352 if (&t->rcu_node_entry == rnp->boost_tasks) 373 if (&t->rcu_node_entry == rnp->boost_tasks)
353 rnp->boost_tasks = np; 374 rnp->boost_tasks = np;
354 /* Snapshot and clear ->rcu_boosted with rcu_node lock held. */ 375 /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */
355 if (t->rcu_boosted) { 376 if (t->rcu_boost_mutex) {
356 special |= RCU_READ_UNLOCK_BOOSTED; 377 rbmp = t->rcu_boost_mutex;
357 t->rcu_boosted = 0; 378 t->rcu_boost_mutex = NULL;
358 } 379 }
359#endif /* #ifdef CONFIG_RCU_BOOST */ 380#endif /* #ifdef CONFIG_RCU_BOOST */
360 t->rcu_blocked_node = NULL;
361 381
362 /* 382 /*
363 * If this was the last task on the current list, and if 383 * If this was the last task on the current list, and if
364 * we aren't waiting on any CPUs, report the quiescent state. 384 * we aren't waiting on any CPUs, report the quiescent state.
365 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. 385 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock.
366 */ 386 */
367 if (empty) 387 if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {
368 raw_spin_unlock_irqrestore(&rnp->lock, flags); 388 trace_rcu_quiescent_state_report("preempt_rcu",
369 else 389 rnp->gpnum,
390 0, rnp->qsmask,
391 rnp->level,
392 rnp->grplo,
393 rnp->grphi,
394 !!rnp->gp_tasks);
370 rcu_report_unblock_qs_rnp(rnp, flags); 395 rcu_report_unblock_qs_rnp(rnp, flags);
396 } else
397 raw_spin_unlock_irqrestore(&rnp->lock, flags);
371 398
372#ifdef CONFIG_RCU_BOOST 399#ifdef CONFIG_RCU_BOOST
373 /* Unboost if we were boosted. */ 400 /* Unboost if we were boosted. */
374 if (special & RCU_READ_UNLOCK_BOOSTED) { 401 if (rbmp)
375 rt_mutex_unlock(t->rcu_boost_mutex); 402 rt_mutex_unlock(rbmp);
376 t->rcu_boost_mutex = NULL;
377 }
378#endif /* #ifdef CONFIG_RCU_BOOST */ 403#endif /* #ifdef CONFIG_RCU_BOOST */
379 404
380 /* 405 /*
@@ -399,10 +424,10 @@ void __rcu_read_unlock(void)
399{ 424{
400 struct task_struct *t = current; 425 struct task_struct *t = current;
401 426
402 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */
403 if (t->rcu_read_lock_nesting != 1) 427 if (t->rcu_read_lock_nesting != 1)
404 --t->rcu_read_lock_nesting; 428 --t->rcu_read_lock_nesting;
405 else { 429 else {
430 barrier(); /* critical section before exit code. */
406 t->rcu_read_lock_nesting = INT_MIN; 431 t->rcu_read_lock_nesting = INT_MIN;
407 barrier(); /* assign before ->rcu_read_unlock_special load */ 432 barrier(); /* assign before ->rcu_read_unlock_special load */
408 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 433 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
@@ -466,16 +491,20 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
466 * Scan the current list of tasks blocked within RCU read-side critical 491 * Scan the current list of tasks blocked within RCU read-side critical
467 * sections, printing out the tid of each. 492 * sections, printing out the tid of each.
468 */ 493 */
469static void rcu_print_task_stall(struct rcu_node *rnp) 494static int rcu_print_task_stall(struct rcu_node *rnp)
470{ 495{
471 struct task_struct *t; 496 struct task_struct *t;
497 int ndetected = 0;
472 498
473 if (!rcu_preempt_blocked_readers_cgp(rnp)) 499 if (!rcu_preempt_blocked_readers_cgp(rnp))
474 return; 500 return 0;
475 t = list_entry(rnp->gp_tasks, 501 t = list_entry(rnp->gp_tasks,
476 struct task_struct, rcu_node_entry); 502 struct task_struct, rcu_node_entry);
477 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) 503 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
478 printk(" P%d", t->pid); 504 printk(" P%d", t->pid);
505 ndetected++;
506 }
507 return ndetected;
479} 508}
480 509
481/* 510/*
@@ -656,18 +685,9 @@ EXPORT_SYMBOL_GPL(call_rcu);
656 */ 685 */
657void synchronize_rcu(void) 686void synchronize_rcu(void)
658{ 687{
659 struct rcu_synchronize rcu;
660
661 if (!rcu_scheduler_active) 688 if (!rcu_scheduler_active)
662 return; 689 return;
663 690 wait_rcu_gp(call_rcu);
664 init_rcu_head_on_stack(&rcu.head);
665 init_completion(&rcu.completion);
666 /* Will wake me after RCU finished. */
667 call_rcu(&rcu.head, wakeme_after_rcu);
668 /* Wait for it. */
669 wait_for_completion(&rcu.completion);
670 destroy_rcu_head_on_stack(&rcu.head);
671} 691}
672EXPORT_SYMBOL_GPL(synchronize_rcu); 692EXPORT_SYMBOL_GPL(synchronize_rcu);
673 693
@@ -968,8 +988,9 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
968 * Because preemptible RCU does not exist, we never have to check for 988 * Because preemptible RCU does not exist, we never have to check for
969 * tasks blocked within RCU read-side critical sections. 989 * tasks blocked within RCU read-side critical sections.
970 */ 990 */
971static void rcu_print_task_stall(struct rcu_node *rnp) 991static int rcu_print_task_stall(struct rcu_node *rnp)
972{ 992{
993 return 0;
973} 994}
974 995
975/* 996/*
@@ -1136,6 +1157,8 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1136 1157
1137#endif /* #else #ifdef CONFIG_RCU_TRACE */ 1158#endif /* #else #ifdef CONFIG_RCU_TRACE */
1138 1159
1160static struct lock_class_key rcu_boost_class;
1161
1139/* 1162/*
1140 * Carry out RCU priority boosting on the task indicated by ->exp_tasks 1163 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1141 * or ->boost_tasks, advancing the pointer to the next task in the 1164 * or ->boost_tasks, advancing the pointer to the next task in the
@@ -1198,8 +1221,10 @@ static int rcu_boost(struct rcu_node *rnp)
1198 */ 1221 */
1199 t = container_of(tb, struct task_struct, rcu_node_entry); 1222 t = container_of(tb, struct task_struct, rcu_node_entry);
1200 rt_mutex_init_proxy_locked(&mtx, t); 1223 rt_mutex_init_proxy_locked(&mtx, t);
1224 /* Avoid lockdep false positives. This rt_mutex is its own thing. */
1225 lockdep_set_class_and_name(&mtx.wait_lock, &rcu_boost_class,
1226 "rcu_boost_mutex");
1201 t->rcu_boost_mutex = &mtx; 1227 t->rcu_boost_mutex = &mtx;
1202 t->rcu_boosted = 1;
1203 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1228 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1204 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ 1229 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
1205 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ 1230 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
@@ -1228,9 +1253,12 @@ static int rcu_boost_kthread(void *arg)
1228 int spincnt = 0; 1253 int spincnt = 0;
1229 int more2boost; 1254 int more2boost;
1230 1255
1256 trace_rcu_utilization("Start boost kthread@init");
1231 for (;;) { 1257 for (;;) {
1232 rnp->boost_kthread_status = RCU_KTHREAD_WAITING; 1258 rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
1259 trace_rcu_utilization("End boost kthread@rcu_wait");
1233 rcu_wait(rnp->boost_tasks || rnp->exp_tasks); 1260 rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
1261 trace_rcu_utilization("Start boost kthread@rcu_wait");
1234 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; 1262 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
1235 more2boost = rcu_boost(rnp); 1263 more2boost = rcu_boost(rnp);
1236 if (more2boost) 1264 if (more2boost)
@@ -1238,11 +1266,14 @@ static int rcu_boost_kthread(void *arg)
1238 else 1266 else
1239 spincnt = 0; 1267 spincnt = 0;
1240 if (spincnt > 10) { 1268 if (spincnt > 10) {
1269 trace_rcu_utilization("End boost kthread@rcu_yield");
1241 rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); 1270 rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp);
1271 trace_rcu_utilization("Start boost kthread@rcu_yield");
1242 spincnt = 0; 1272 spincnt = 0;
1243 } 1273 }
1244 } 1274 }
1245 /* NOTREACHED */ 1275 /* NOTREACHED */
1276 trace_rcu_utilization("End boost kthread@notreached");
1246 return 0; 1277 return 0;
1247} 1278}
1248 1279
@@ -1291,11 +1322,9 @@ static void invoke_rcu_callbacks_kthread(void)
1291 1322
1292 local_irq_save(flags); 1323 local_irq_save(flags);
1293 __this_cpu_write(rcu_cpu_has_work, 1); 1324 __this_cpu_write(rcu_cpu_has_work, 1);
1294 if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) { 1325 if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
1295 local_irq_restore(flags); 1326 current != __this_cpu_read(rcu_cpu_kthread_task))
1296 return; 1327 wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
1297 }
1298 wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
1299 local_irq_restore(flags); 1328 local_irq_restore(flags);
1300} 1329}
1301 1330
@@ -1343,13 +1372,13 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1343 if (rnp->boost_kthread_task != NULL) 1372 if (rnp->boost_kthread_task != NULL)
1344 return 0; 1373 return 0;
1345 t = kthread_create(rcu_boost_kthread, (void *)rnp, 1374 t = kthread_create(rcu_boost_kthread, (void *)rnp,
1346 "rcub%d", rnp_index); 1375 "rcub/%d", rnp_index);
1347 if (IS_ERR(t)) 1376 if (IS_ERR(t))
1348 return PTR_ERR(t); 1377 return PTR_ERR(t);
1349 raw_spin_lock_irqsave(&rnp->lock, flags); 1378 raw_spin_lock_irqsave(&rnp->lock, flags);
1350 rnp->boost_kthread_task = t; 1379 rnp->boost_kthread_task = t;
1351 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1380 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1352 sp.sched_priority = RCU_KTHREAD_PRIO; 1381 sp.sched_priority = RCU_BOOST_PRIO;
1353 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1382 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1354 wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ 1383 wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
1355 return 0; 1384 return 0;
@@ -1444,6 +1473,7 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
1444{ 1473{
1445 struct sched_param sp; 1474 struct sched_param sp;
1446 struct timer_list yield_timer; 1475 struct timer_list yield_timer;
1476 int prio = current->rt_priority;
1447 1477
1448 setup_timer_on_stack(&yield_timer, f, arg); 1478 setup_timer_on_stack(&yield_timer, f, arg);
1449 mod_timer(&yield_timer, jiffies + 2); 1479 mod_timer(&yield_timer, jiffies + 2);
@@ -1451,7 +1481,8 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
1451 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); 1481 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
1452 set_user_nice(current, 19); 1482 set_user_nice(current, 19);
1453 schedule(); 1483 schedule();
1454 sp.sched_priority = RCU_KTHREAD_PRIO; 1484 set_user_nice(current, 0);
1485 sp.sched_priority = prio;
1455 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1486 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1456 del_timer(&yield_timer); 1487 del_timer(&yield_timer);
1457} 1488}
@@ -1489,7 +1520,8 @@ static int rcu_cpu_kthread_should_stop(int cpu)
1489 1520
1490/* 1521/*
1491 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the 1522 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
1492 * earlier RCU softirq. 1523 * RCU softirq used in flavors and configurations of RCU that do not
1524 * support RCU priority boosting.
1493 */ 1525 */
1494static int rcu_cpu_kthread(void *arg) 1526static int rcu_cpu_kthread(void *arg)
1495{ 1527{
@@ -1500,9 +1532,12 @@ static int rcu_cpu_kthread(void *arg)
1500 char work; 1532 char work;
1501 char *workp = &per_cpu(rcu_cpu_has_work, cpu); 1533 char *workp = &per_cpu(rcu_cpu_has_work, cpu);
1502 1534
1535 trace_rcu_utilization("Start CPU kthread@init");
1503 for (;;) { 1536 for (;;) {
1504 *statusp = RCU_KTHREAD_WAITING; 1537 *statusp = RCU_KTHREAD_WAITING;
1538 trace_rcu_utilization("End CPU kthread@rcu_wait");
1505 rcu_wait(*workp != 0 || kthread_should_stop()); 1539 rcu_wait(*workp != 0 || kthread_should_stop());
1540 trace_rcu_utilization("Start CPU kthread@rcu_wait");
1506 local_bh_disable(); 1541 local_bh_disable();
1507 if (rcu_cpu_kthread_should_stop(cpu)) { 1542 if (rcu_cpu_kthread_should_stop(cpu)) {
1508 local_bh_enable(); 1543 local_bh_enable();
@@ -1523,11 +1558,14 @@ static int rcu_cpu_kthread(void *arg)
1523 spincnt = 0; 1558 spincnt = 0;
1524 if (spincnt > 10) { 1559 if (spincnt > 10) {
1525 *statusp = RCU_KTHREAD_YIELDING; 1560 *statusp = RCU_KTHREAD_YIELDING;
1561 trace_rcu_utilization("End CPU kthread@rcu_yield");
1526 rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); 1562 rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu);
1563 trace_rcu_utilization("Start CPU kthread@rcu_yield");
1527 spincnt = 0; 1564 spincnt = 0;
1528 } 1565 }
1529 } 1566 }
1530 *statusp = RCU_KTHREAD_STOPPED; 1567 *statusp = RCU_KTHREAD_STOPPED;
1568 trace_rcu_utilization("End CPU kthread@term");
1531 return 0; 1569 return 0;
1532} 1570}
1533 1571
@@ -1560,7 +1598,10 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
1560 if (!rcu_scheduler_fully_active || 1598 if (!rcu_scheduler_fully_active ||
1561 per_cpu(rcu_cpu_kthread_task, cpu) != NULL) 1599 per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
1562 return 0; 1600 return 0;
1563 t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); 1601 t = kthread_create_on_node(rcu_cpu_kthread,
1602 (void *)(long)cpu,
1603 cpu_to_node(cpu),
1604 "rcuc/%d", cpu);
1564 if (IS_ERR(t)) 1605 if (IS_ERR(t))
1565 return PTR_ERR(t); 1606 return PTR_ERR(t);
1566 if (cpu_online(cpu)) 1607 if (cpu_online(cpu))
@@ -1669,7 +1710,7 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
1669 return 0; 1710 return 0;
1670 if (rnp->node_kthread_task == NULL) { 1711 if (rnp->node_kthread_task == NULL) {
1671 t = kthread_create(rcu_node_kthread, (void *)rnp, 1712 t = kthread_create(rcu_node_kthread, (void *)rnp,
1672 "rcun%d", rnp_index); 1713 "rcun/%d", rnp_index);
1673 if (IS_ERR(t)) 1714 if (IS_ERR(t))
1674 return PTR_ERR(t); 1715 return PTR_ERR(t);
1675 raw_spin_lock_irqsave(&rnp->lock, flags); 1716 raw_spin_lock_irqsave(&rnp->lock, flags);
@@ -1907,15 +1948,6 @@ int rcu_needs_cpu(int cpu)
1907 return rcu_needs_cpu_quick_check(cpu); 1948 return rcu_needs_cpu_quick_check(cpu);
1908} 1949}
1909 1950
1910/*
1911 * Check to see if we need to continue a callback-flush operations to
1912 * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle
1913 * entry is not configured, so we never do need to.
1914 */
1915static void rcu_needs_cpu_flush(void)
1916{
1917}
1918
1919#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 1951#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1920 1952
1921#define RCU_NEEDS_CPU_FLUSHES 5 1953#define RCU_NEEDS_CPU_FLUSHES 5
@@ -1991,20 +2023,4 @@ int rcu_needs_cpu(int cpu)
1991 return c; 2023 return c;
1992} 2024}
1993 2025
1994/*
1995 * Check to see if we need to continue a callback-flush operations to
1996 * allow the last CPU to enter dyntick-idle mode.
1997 */
1998static void rcu_needs_cpu_flush(void)
1999{
2000 int cpu = smp_processor_id();
2001 unsigned long flags;
2002
2003 if (per_cpu(rcu_dyntick_drain, cpu) <= 0)
2004 return;
2005 local_irq_save(flags);
2006 (void)rcu_needs_cpu(cpu);
2007 local_irq_restore(flags);
2008}
2009
2010#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 2026#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 3b0c0986afc0..9feffa4c0695 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -48,11 +48,6 @@
48 48
49#ifdef CONFIG_RCU_BOOST 49#ifdef CONFIG_RCU_BOOST
50 50
51DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
52DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu);
53DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
54DECLARE_PER_CPU(char, rcu_cpu_has_work);
55
56static char convert_kthread_status(unsigned int kthread_status) 51static char convert_kthread_status(unsigned int kthread_status)
57{ 52{
58 if (kthread_status > RCU_KTHREAD_MAX) 53 if (kthread_status > RCU_KTHREAD_MAX)
@@ -66,11 +61,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
66{ 61{
67 if (!rdp->beenonline) 62 if (!rdp->beenonline)
68 return; 63 return;
69 seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pqc=%lu qp=%d", 64 seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pgp=%lu qp=%d",
70 rdp->cpu, 65 rdp->cpu,
71 cpu_is_offline(rdp->cpu) ? '!' : ' ', 66 cpu_is_offline(rdp->cpu) ? '!' : ' ',
72 rdp->completed, rdp->gpnum, 67 rdp->completed, rdp->gpnum,
73 rdp->passed_quiesc, rdp->passed_quiesc_completed, 68 rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
74 rdp->qs_pending); 69 rdp->qs_pending);
75#ifdef CONFIG_NO_HZ 70#ifdef CONFIG_NO_HZ
76 seq_printf(m, " dt=%d/%d/%d df=%lu", 71 seq_printf(m, " dt=%d/%d/%d df=%lu",
@@ -144,7 +139,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
144 rdp->cpu, 139 rdp->cpu,
145 cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"", 140 cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"",
146 rdp->completed, rdp->gpnum, 141 rdp->completed, rdp->gpnum,
147 rdp->passed_quiesc, rdp->passed_quiesc_completed, 142 rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
148 rdp->qs_pending); 143 rdp->qs_pending);
149#ifdef CONFIG_NO_HZ 144#ifdef CONFIG_NO_HZ
150 seq_printf(m, ",%d,%d,%d,%lu", 145 seq_printf(m, ",%d,%d,%d,%lu",
@@ -175,7 +170,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
175 170
176static int show_rcudata_csv(struct seq_file *m, void *unused) 171static int show_rcudata_csv(struct seq_file *m, void *unused)
177{ 172{
178 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); 173 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
179#ifdef CONFIG_NO_HZ 174#ifdef CONFIG_NO_HZ
180 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); 175 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
181#endif /* #ifdef CONFIG_NO_HZ */ 176#endif /* #ifdef CONFIG_NO_HZ */
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 255e1662acdb..5e8d9cce7470 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -579,6 +579,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
579 struct rt_mutex_waiter *waiter) 579 struct rt_mutex_waiter *waiter)
580{ 580{
581 int ret = 0; 581 int ret = 0;
582 int was_disabled;
582 583
583 for (;;) { 584 for (;;) {
584 /* Try to acquire the lock: */ 585 /* Try to acquire the lock: */
@@ -601,10 +602,17 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
601 602
602 raw_spin_unlock(&lock->wait_lock); 603 raw_spin_unlock(&lock->wait_lock);
603 604
605 was_disabled = irqs_disabled();
606 if (was_disabled)
607 local_irq_enable();
608
604 debug_rt_mutex_print_deadlock(waiter); 609 debug_rt_mutex_print_deadlock(waiter);
605 610
606 schedule_rt_mutex(lock); 611 schedule_rt_mutex(lock);
607 612
613 if (was_disabled)
614 local_irq_disable();
615
608 raw_spin_lock(&lock->wait_lock); 616 raw_spin_lock(&lock->wait_lock);
609 set_current_state(state); 617 set_current_state(state);
610 } 618 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 8aa00803c1ec..03ad0113801a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4213,6 +4213,7 @@ static inline void schedule_debug(struct task_struct *prev)
4213 */ 4213 */
4214 if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) 4214 if (unlikely(in_atomic_preempt_off() && !prev->exit_state))
4215 __schedule_bug(prev); 4215 __schedule_bug(prev);
4216 rcu_sleep_check();
4216 4217
4217 profile_hit(SCHED_PROFILING, __builtin_return_address(0)); 4218 profile_hit(SCHED_PROFILING, __builtin_return_address(0));
4218 4219
@@ -5955,15 +5956,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5955} 5956}
5956 5957
5957/* 5958/*
5958 * In a system that switches off the HZ timer nohz_cpu_mask
5959 * indicates which cpus entered this state. This is used
5960 * in the rcu update to wait only for active cpus. For system
5961 * which do not switch off the HZ timer nohz_cpu_mask should
5962 * always be CPU_BITS_NONE.
5963 */
5964cpumask_var_t nohz_cpu_mask;
5965
5966/*
5967 * Increase the granularity value when there are more CPUs, 5959 * Increase the granularity value when there are more CPUs,
5968 * because with more CPUs the 'effective latency' as visible 5960 * because with more CPUs the 'effective latency' as visible
5969 * to users decreases. But the relationship is not linear, 5961 * to users decreases. But the relationship is not linear,
@@ -8175,8 +8167,6 @@ void __init sched_init(void)
8175 */ 8167 */
8176 current->sched_class = &fair_sched_class; 8168 current->sched_class = &fair_sched_class;
8177 8169
8178 /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
8179 zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
8180#ifdef CONFIG_SMP 8170#ifdef CONFIG_SMP
8181 zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); 8171 zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
8182#ifdef CONFIG_NO_HZ 8172#ifdef CONFIG_NO_HZ
@@ -8206,6 +8196,7 @@ void __might_sleep(const char *file, int line, int preempt_offset)
8206{ 8196{
8207 static unsigned long prev_jiffy; /* ratelimiting */ 8197 static unsigned long prev_jiffy; /* ratelimiting */
8208 8198
8199 rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
8209 if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || 8200 if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
8210 system_state != SYSTEM_RUNNING || oops_in_progress) 8201 system_state != SYSTEM_RUNNING || oops_in_progress)
8211 return; 8202 return;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d5097c44b407..eb98e55196b9 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -139,7 +139,6 @@ static void tick_nohz_update_jiffies(ktime_t now)
139 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 139 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
140 unsigned long flags; 140 unsigned long flags;
141 141
142 cpumask_clear_cpu(cpu, nohz_cpu_mask);
143 ts->idle_waketime = now; 142 ts->idle_waketime = now;
144 143
145 local_irq_save(flags); 144 local_irq_save(flags);
@@ -389,9 +388,6 @@ void tick_nohz_stop_sched_tick(int inidle)
389 else 388 else
390 expires.tv64 = KTIME_MAX; 389 expires.tv64 = KTIME_MAX;
391 390
392 if (delta_jiffies > 1)
393 cpumask_set_cpu(cpu, nohz_cpu_mask);
394
395 /* Skip reprogram of event if its not changed */ 391 /* Skip reprogram of event if its not changed */
396 if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) 392 if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
397 goto out; 393 goto out;
@@ -441,7 +437,6 @@ void tick_nohz_stop_sched_tick(int inidle)
441 * softirq. 437 * softirq.
442 */ 438 */
443 tick_do_update_jiffies64(ktime_get()); 439 tick_do_update_jiffies64(ktime_get());
444 cpumask_clear_cpu(cpu, nohz_cpu_mask);
445 } 440 }
446 raise_softirq_irqoff(TIMER_SOFTIRQ); 441 raise_softirq_irqoff(TIMER_SOFTIRQ);
447out: 442out:
@@ -524,7 +519,6 @@ void tick_nohz_restart_sched_tick(void)
524 /* Update jiffies first */ 519 /* Update jiffies first */
525 select_nohz_load_balancer(0); 520 select_nohz_load_balancer(0);
526 tick_do_update_jiffies64(now); 521 tick_do_update_jiffies64(now);
527 cpumask_clear_cpu(cpu, nohz_cpu_mask);
528 522
529#ifndef CONFIG_VIRT_CPU_ACCOUNTING 523#ifndef CONFIG_VIRT_CPU_ACCOUNTING
530 /* 524 /*