diff options
| author | Ingo Molnar <mingo@elte.hu> | 2010-12-23 06:57:04 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2010-12-23 06:57:04 -0500 |
| commit | 394f4528c523d88daabd50f883a8d6b164075555 (patch) | |
| tree | b45a5b87a1ba9be8afe71f1db1537ff19e2b05d8 /kernel | |
| parent | 90a8a73c06cc32b609a880d48449d7083327e11a (diff) | |
| parent | 3c2dcf2aed5ea22ecf65a9a871c4963faec421b3 (diff) | |
Merge branch 'rcu/next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-2.6-rcu into core/rcu
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/rcutiny.c | 105 | ||||
| -rw-r--r-- | kernel/rcutiny_plugin.h | 433 | ||||
| -rw-r--r-- | kernel/rcutorture.c | 270 | ||||
| -rw-r--r-- | kernel/rcutree.c | 156 | ||||
| -rw-r--r-- | kernel/rcutree.h | 61 | ||||
| -rw-r--r-- | kernel/rcutree_plugin.h | 135 | ||||
| -rw-r--r-- | kernel/rcutree_trace.c | 12 | ||||
| -rw-r--r-- | kernel/sched.c | 69 | ||||
| -rw-r--r-- | kernel/srcu.c | 8 |
9 files changed, 996 insertions, 253 deletions
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index d806735342ac..034493724749 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c | |||
| @@ -36,31 +36,16 @@ | |||
| 36 | #include <linux/time.h> | 36 | #include <linux/time.h> |
| 37 | #include <linux/cpu.h> | 37 | #include <linux/cpu.h> |
| 38 | 38 | ||
| 39 | /* Global control variables for rcupdate callback mechanism. */ | 39 | /* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ |
| 40 | struct rcu_ctrlblk { | 40 | static struct task_struct *rcu_kthread_task; |
| 41 | struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ | 41 | static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); |
| 42 | struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ | 42 | static unsigned long have_rcu_kthread_work; |
| 43 | struct rcu_head **curtail; /* ->next pointer of last CB. */ | 43 | static void invoke_rcu_kthread(void); |
| 44 | }; | ||
| 45 | |||
| 46 | /* Definition for rcupdate control block. */ | ||
| 47 | static struct rcu_ctrlblk rcu_sched_ctrlblk = { | ||
| 48 | .donetail = &rcu_sched_ctrlblk.rcucblist, | ||
| 49 | .curtail = &rcu_sched_ctrlblk.rcucblist, | ||
| 50 | }; | ||
| 51 | |||
| 52 | static struct rcu_ctrlblk rcu_bh_ctrlblk = { | ||
| 53 | .donetail = &rcu_bh_ctrlblk.rcucblist, | ||
| 54 | .curtail = &rcu_bh_ctrlblk.rcucblist, | ||
| 55 | }; | ||
| 56 | |||
| 57 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
| 58 | int rcu_scheduler_active __read_mostly; | ||
| 59 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | ||
| 60 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||
| 61 | 44 | ||
| 62 | /* Forward declarations for rcutiny_plugin.h. */ | 45 | /* Forward declarations for rcutiny_plugin.h. */ |
| 63 | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); | 46 | struct rcu_ctrlblk; |
| 47 | static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); | ||
| 48 | static int rcu_kthread(void *arg); | ||
| 64 | static void __call_rcu(struct rcu_head *head, | 49 | static void __call_rcu(struct rcu_head *head, |
| 65 | void (*func)(struct rcu_head *rcu), | 50 | void (*func)(struct rcu_head *rcu), |
| 66 | struct rcu_ctrlblk *rcp); | 51 | struct rcu_ctrlblk *rcp); |
| @@ -123,7 +108,7 @@ void rcu_sched_qs(int cpu) | |||
| 123 | { | 108 | { |
| 124 | if (rcu_qsctr_help(&rcu_sched_ctrlblk) + | 109 | if (rcu_qsctr_help(&rcu_sched_ctrlblk) + |
| 125 | rcu_qsctr_help(&rcu_bh_ctrlblk)) | 110 | rcu_qsctr_help(&rcu_bh_ctrlblk)) |
| 126 | raise_softirq(RCU_SOFTIRQ); | 111 | invoke_rcu_kthread(); |
| 127 | } | 112 | } |
| 128 | 113 | ||
| 129 | /* | 114 | /* |
| @@ -132,7 +117,7 @@ void rcu_sched_qs(int cpu) | |||
| 132 | void rcu_bh_qs(int cpu) | 117 | void rcu_bh_qs(int cpu) |
| 133 | { | 118 | { |
| 134 | if (rcu_qsctr_help(&rcu_bh_ctrlblk)) | 119 | if (rcu_qsctr_help(&rcu_bh_ctrlblk)) |
| 135 | raise_softirq(RCU_SOFTIRQ); | 120 | invoke_rcu_kthread(); |
| 136 | } | 121 | } |
| 137 | 122 | ||
| 138 | /* | 123 | /* |
| @@ -152,13 +137,14 @@ void rcu_check_callbacks(int cpu, int user) | |||
| 152 | } | 137 | } |
| 153 | 138 | ||
| 154 | /* | 139 | /* |
| 155 | * Helper function for rcu_process_callbacks() that operates on the | 140 | * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure |
| 156 | * specified rcu_ctrlkblk structure. | 141 | * whose grace period has elapsed. |
| 157 | */ | 142 | */ |
| 158 | static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | 143 | static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) |
| 159 | { | 144 | { |
| 160 | struct rcu_head *next, *list; | 145 | struct rcu_head *next, *list; |
| 161 | unsigned long flags; | 146 | unsigned long flags; |
| 147 | RCU_TRACE(int cb_count = 0); | ||
| 162 | 148 | ||
| 163 | /* If no RCU callbacks ready to invoke, just return. */ | 149 | /* If no RCU callbacks ready to invoke, just return. */ |
| 164 | if (&rcp->rcucblist == rcp->donetail) | 150 | if (&rcp->rcucblist == rcp->donetail) |
| @@ -180,19 +166,58 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
| 180 | next = list->next; | 166 | next = list->next; |
| 181 | prefetch(next); | 167 | prefetch(next); |
| 182 | debug_rcu_head_unqueue(list); | 168 | debug_rcu_head_unqueue(list); |
| 169 | local_bh_disable(); | ||
| 183 | list->func(list); | 170 | list->func(list); |
| 171 | local_bh_enable(); | ||
| 184 | list = next; | 172 | list = next; |
| 173 | RCU_TRACE(cb_count++); | ||
| 185 | } | 174 | } |
| 175 | RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); | ||
| 186 | } | 176 | } |
| 187 | 177 | ||
| 188 | /* | 178 | /* |
| 189 | * Invoke any callbacks whose grace period has completed. | 179 | * This kthread invokes RCU callbacks whose grace periods have |
| 180 | * elapsed. It is awakened as needed, and takes the place of the | ||
| 181 | * RCU_SOFTIRQ that was used previously for this purpose. | ||
| 182 | * This is a kthread, but it is never stopped, at least not until | ||
| 183 | * the system goes down. | ||
| 190 | */ | 184 | */ |
| 191 | static void rcu_process_callbacks(struct softirq_action *unused) | 185 | static int rcu_kthread(void *arg) |
| 192 | { | 186 | { |
| 193 | __rcu_process_callbacks(&rcu_sched_ctrlblk); | 187 | unsigned long work; |
| 194 | __rcu_process_callbacks(&rcu_bh_ctrlblk); | 188 | unsigned long morework; |
| 195 | rcu_preempt_process_callbacks(); | 189 | unsigned long flags; |
| 190 | |||
| 191 | for (;;) { | ||
| 192 | wait_event(rcu_kthread_wq, have_rcu_kthread_work != 0); | ||
| 193 | morework = rcu_boost(); | ||
| 194 | local_irq_save(flags); | ||
| 195 | work = have_rcu_kthread_work; | ||
| 196 | have_rcu_kthread_work = morework; | ||
| 197 | local_irq_restore(flags); | ||
| 198 | if (work) { | ||
| 199 | rcu_process_callbacks(&rcu_sched_ctrlblk); | ||
| 200 | rcu_process_callbacks(&rcu_bh_ctrlblk); | ||
| 201 | rcu_preempt_process_callbacks(); | ||
| 202 | } | ||
| 203 | schedule_timeout_interruptible(1); /* Leave CPU for others. */ | ||
| 204 | } | ||
| 205 | |||
| 206 | return 0; /* Not reached, but needed to shut gcc up. */ | ||
| 207 | } | ||
| 208 | |||
| 209 | /* | ||
| 210 | * Wake up rcu_kthread() to process callbacks now eligible for invocation | ||
| 211 | * or to boost readers. | ||
| 212 | */ | ||
| 213 | static void invoke_rcu_kthread(void) | ||
| 214 | { | ||
| 215 | unsigned long flags; | ||
| 216 | |||
| 217 | local_irq_save(flags); | ||
| 218 | have_rcu_kthread_work = 1; | ||
| 219 | wake_up(&rcu_kthread_wq); | ||
| 220 | local_irq_restore(flags); | ||
| 196 | } | 221 | } |
| 197 | 222 | ||
| 198 | /* | 223 | /* |
| @@ -230,6 +255,7 @@ static void __call_rcu(struct rcu_head *head, | |||
| 230 | local_irq_save(flags); | 255 | local_irq_save(flags); |
| 231 | *rcp->curtail = head; | 256 | *rcp->curtail = head; |
| 232 | rcp->curtail = &head->next; | 257 | rcp->curtail = &head->next; |
| 258 | RCU_TRACE(rcp->qlen++); | ||
| 233 | local_irq_restore(flags); | 259 | local_irq_restore(flags); |
| 234 | } | 260 | } |
| 235 | 261 | ||
| @@ -282,7 +308,16 @@ void rcu_barrier_sched(void) | |||
| 282 | } | 308 | } |
| 283 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); | 309 | EXPORT_SYMBOL_GPL(rcu_barrier_sched); |
| 284 | 310 | ||
| 285 | void __init rcu_init(void) | 311 | /* |
| 312 | * Spawn the kthread that invokes RCU callbacks. | ||
| 313 | */ | ||
| 314 | static int __init rcu_spawn_kthreads(void) | ||
| 286 | { | 315 | { |
| 287 | open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | 316 | struct sched_param sp; |
| 317 | |||
| 318 | rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread"); | ||
| 319 | sp.sched_priority = RCU_BOOST_PRIO; | ||
| 320 | sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp); | ||
| 321 | return 0; | ||
| 288 | } | 322 | } |
| 323 | early_initcall(rcu_spawn_kthreads); | ||
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 6ceca4f745ff..015abaea962a 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
| @@ -22,6 +22,40 @@ | |||
| 22 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 22 | * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
| 23 | */ | 23 | */ |
| 24 | 24 | ||
| 25 | #include <linux/kthread.h> | ||
| 26 | #include <linux/debugfs.h> | ||
| 27 | #include <linux/seq_file.h> | ||
| 28 | |||
| 29 | #ifdef CONFIG_RCU_TRACE | ||
| 30 | #define RCU_TRACE(stmt) stmt | ||
| 31 | #else /* #ifdef CONFIG_RCU_TRACE */ | ||
| 32 | #define RCU_TRACE(stmt) | ||
| 33 | #endif /* #else #ifdef CONFIG_RCU_TRACE */ | ||
| 34 | |||
| 35 | /* Global control variables for rcupdate callback mechanism. */ | ||
| 36 | struct rcu_ctrlblk { | ||
| 37 | struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ | ||
| 38 | struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ | ||
| 39 | struct rcu_head **curtail; /* ->next pointer of last CB. */ | ||
| 40 | RCU_TRACE(long qlen); /* Number of pending CBs. */ | ||
| 41 | }; | ||
| 42 | |||
| 43 | /* Definition for rcupdate control block. */ | ||
| 44 | static struct rcu_ctrlblk rcu_sched_ctrlblk = { | ||
| 45 | .donetail = &rcu_sched_ctrlblk.rcucblist, | ||
| 46 | .curtail = &rcu_sched_ctrlblk.rcucblist, | ||
| 47 | }; | ||
| 48 | |||
| 49 | static struct rcu_ctrlblk rcu_bh_ctrlblk = { | ||
| 50 | .donetail = &rcu_bh_ctrlblk.rcucblist, | ||
| 51 | .curtail = &rcu_bh_ctrlblk.rcucblist, | ||
| 52 | }; | ||
| 53 | |||
| 54 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
| 55 | int rcu_scheduler_active __read_mostly; | ||
| 56 | EXPORT_SYMBOL_GPL(rcu_scheduler_active); | ||
| 57 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||
| 58 | |||
| 25 | #ifdef CONFIG_TINY_PREEMPT_RCU | 59 | #ifdef CONFIG_TINY_PREEMPT_RCU |
| 26 | 60 | ||
| 27 | #include <linux/delay.h> | 61 | #include <linux/delay.h> |
| @@ -46,17 +80,45 @@ struct rcu_preempt_ctrlblk { | |||
| 46 | struct list_head *gp_tasks; | 80 | struct list_head *gp_tasks; |
| 47 | /* Pointer to the first task blocking the */ | 81 | /* Pointer to the first task blocking the */ |
| 48 | /* current grace period, or NULL if there */ | 82 | /* current grace period, or NULL if there */ |
| 49 | /* is not such task. */ | 83 | /* is no such task. */ |
| 50 | struct list_head *exp_tasks; | 84 | struct list_head *exp_tasks; |
| 51 | /* Pointer to first task blocking the */ | 85 | /* Pointer to first task blocking the */ |
| 52 | /* current expedited grace period, or NULL */ | 86 | /* current expedited grace period, or NULL */ |
| 53 | /* if there is no such task. If there */ | 87 | /* if there is no such task. If there */ |
| 54 | /* is no current expedited grace period, */ | 88 | /* is no current expedited grace period, */ |
| 55 | /* then there cannot be any such task. */ | 89 | /* then there cannot be any such task. */ |
| 90 | #ifdef CONFIG_RCU_BOOST | ||
| 91 | struct list_head *boost_tasks; | ||
| 92 | /* Pointer to first task that needs to be */ | ||
| 93 | /* priority-boosted, or NULL if no priority */ | ||
| 94 | /* boosting is needed. If there is no */ | ||
| 95 | /* current or expedited grace period, there */ | ||
| 96 | /* can be no such task. */ | ||
| 97 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 56 | u8 gpnum; /* Current grace period. */ | 98 | u8 gpnum; /* Current grace period. */ |
| 57 | u8 gpcpu; /* Last grace period blocked by the CPU. */ | 99 | u8 gpcpu; /* Last grace period blocked by the CPU. */ |
| 58 | u8 completed; /* Last grace period completed. */ | 100 | u8 completed; /* Last grace period completed. */ |
| 59 | /* If all three are equal, RCU is idle. */ | 101 | /* If all three are equal, RCU is idle. */ |
| 102 | #ifdef CONFIG_RCU_BOOST | ||
| 103 | s8 boosted_this_gp; /* Has boosting already happened? */ | ||
| 104 | unsigned long boost_time; /* When to start boosting (jiffies) */ | ||
| 105 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 106 | #ifdef CONFIG_RCU_TRACE | ||
| 107 | unsigned long n_grace_periods; | ||
| 108 | #ifdef CONFIG_RCU_BOOST | ||
| 109 | unsigned long n_tasks_boosted; | ||
| 110 | unsigned long n_exp_boosts; | ||
| 111 | unsigned long n_normal_boosts; | ||
| 112 | unsigned long n_normal_balk_blkd_tasks; | ||
| 113 | unsigned long n_normal_balk_gp_tasks; | ||
| 114 | unsigned long n_normal_balk_boost_tasks; | ||
| 115 | unsigned long n_normal_balk_boosted; | ||
| 116 | unsigned long n_normal_balk_notyet; | ||
| 117 | unsigned long n_normal_balk_nos; | ||
| 118 | unsigned long n_exp_balk_blkd_tasks; | ||
| 119 | unsigned long n_exp_balk_nos; | ||
| 120 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 121 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
| 60 | }; | 122 | }; |
| 61 | 123 | ||
| 62 | static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { | 124 | static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { |
| @@ -122,6 +184,210 @@ static int rcu_preempt_gp_in_progress(void) | |||
| 122 | } | 184 | } |
| 123 | 185 | ||
| 124 | /* | 186 | /* |
| 187 | * Advance a ->blkd_tasks-list pointer to the next entry, instead | ||
| 188 | * returning NULL if at the end of the list. | ||
| 189 | */ | ||
| 190 | static struct list_head *rcu_next_node_entry(struct task_struct *t) | ||
| 191 | { | ||
| 192 | struct list_head *np; | ||
| 193 | |||
| 194 | np = t->rcu_node_entry.next; | ||
| 195 | if (np == &rcu_preempt_ctrlblk.blkd_tasks) | ||
| 196 | np = NULL; | ||
| 197 | return np; | ||
| 198 | } | ||
| 199 | |||
| 200 | #ifdef CONFIG_RCU_TRACE | ||
| 201 | |||
| 202 | #ifdef CONFIG_RCU_BOOST | ||
| 203 | static void rcu_initiate_boost_trace(void); | ||
| 204 | static void rcu_initiate_exp_boost_trace(void); | ||
| 205 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 206 | |||
| 207 | /* | ||
| 208 | * Dump additional statistice for TINY_PREEMPT_RCU. | ||
| 209 | */ | ||
| 210 | static void show_tiny_preempt_stats(struct seq_file *m) | ||
| 211 | { | ||
| 212 | seq_printf(m, "rcu_preempt: qlen=%ld gp=%lu g%u/p%u/c%u tasks=%c%c%c\n", | ||
| 213 | rcu_preempt_ctrlblk.rcb.qlen, | ||
| 214 | rcu_preempt_ctrlblk.n_grace_periods, | ||
| 215 | rcu_preempt_ctrlblk.gpnum, | ||
| 216 | rcu_preempt_ctrlblk.gpcpu, | ||
| 217 | rcu_preempt_ctrlblk.completed, | ||
| 218 | "T."[list_empty(&rcu_preempt_ctrlblk.blkd_tasks)], | ||
| 219 | "N."[!rcu_preempt_ctrlblk.gp_tasks], | ||
| 220 | "E."[!rcu_preempt_ctrlblk.exp_tasks]); | ||
| 221 | #ifdef CONFIG_RCU_BOOST | ||
| 222 | seq_printf(m, " ttb=%c btg=", | ||
| 223 | "B."[!rcu_preempt_ctrlblk.boost_tasks]); | ||
| 224 | switch (rcu_preempt_ctrlblk.boosted_this_gp) { | ||
| 225 | case -1: | ||
| 226 | seq_puts(m, "exp"); | ||
| 227 | break; | ||
| 228 | case 0: | ||
| 229 | seq_puts(m, "no"); | ||
| 230 | break; | ||
| 231 | case 1: | ||
| 232 | seq_puts(m, "begun"); | ||
| 233 | break; | ||
| 234 | case 2: | ||
| 235 | seq_puts(m, "done"); | ||
| 236 | break; | ||
| 237 | default: | ||
| 238 | seq_printf(m, "?%d?", rcu_preempt_ctrlblk.boosted_this_gp); | ||
| 239 | } | ||
| 240 | seq_printf(m, " ntb=%lu neb=%lu nnb=%lu j=%04x bt=%04x\n", | ||
| 241 | rcu_preempt_ctrlblk.n_tasks_boosted, | ||
| 242 | rcu_preempt_ctrlblk.n_exp_boosts, | ||
| 243 | rcu_preempt_ctrlblk.n_normal_boosts, | ||
| 244 | (int)(jiffies & 0xffff), | ||
| 245 | (int)(rcu_preempt_ctrlblk.boost_time & 0xffff)); | ||
| 246 | seq_printf(m, " %s: nt=%lu gt=%lu bt=%lu b=%lu ny=%lu nos=%lu\n", | ||
| 247 | "normal balk", | ||
| 248 | rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks, | ||
| 249 | rcu_preempt_ctrlblk.n_normal_balk_gp_tasks, | ||
| 250 | rcu_preempt_ctrlblk.n_normal_balk_boost_tasks, | ||
| 251 | rcu_preempt_ctrlblk.n_normal_balk_boosted, | ||
| 252 | rcu_preempt_ctrlblk.n_normal_balk_notyet, | ||
| 253 | rcu_preempt_ctrlblk.n_normal_balk_nos); | ||
| 254 | seq_printf(m, " exp balk: bt=%lu nos=%lu\n", | ||
| 255 | rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks, | ||
| 256 | rcu_preempt_ctrlblk.n_exp_balk_nos); | ||
| 257 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 258 | } | ||
| 259 | |||
| 260 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
| 261 | |||
| 262 | #ifdef CONFIG_RCU_BOOST | ||
| 263 | |||
| 264 | #include "rtmutex_common.h" | ||
| 265 | |||
| 266 | /* | ||
| 267 | * Carry out RCU priority boosting on the task indicated by ->boost_tasks, | ||
| 268 | * and advance ->boost_tasks to the next task in the ->blkd_tasks list. | ||
| 269 | */ | ||
| 270 | static int rcu_boost(void) | ||
| 271 | { | ||
| 272 | unsigned long flags; | ||
| 273 | struct rt_mutex mtx; | ||
| 274 | struct list_head *np; | ||
| 275 | struct task_struct *t; | ||
| 276 | |||
| 277 | if (rcu_preempt_ctrlblk.boost_tasks == NULL) | ||
| 278 | return 0; /* Nothing to boost. */ | ||
| 279 | raw_local_irq_save(flags); | ||
| 280 | rcu_preempt_ctrlblk.boosted_this_gp++; | ||
| 281 | t = container_of(rcu_preempt_ctrlblk.boost_tasks, struct task_struct, | ||
| 282 | rcu_node_entry); | ||
| 283 | np = rcu_next_node_entry(t); | ||
| 284 | rt_mutex_init_proxy_locked(&mtx, t); | ||
| 285 | t->rcu_boost_mutex = &mtx; | ||
| 286 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BOOSTED; | ||
| 287 | raw_local_irq_restore(flags); | ||
| 288 | rt_mutex_lock(&mtx); | ||
| 289 | RCU_TRACE(rcu_preempt_ctrlblk.n_tasks_boosted++); | ||
| 290 | rcu_preempt_ctrlblk.boosted_this_gp++; | ||
| 291 | rt_mutex_unlock(&mtx); | ||
| 292 | return rcu_preempt_ctrlblk.boost_tasks != NULL; | ||
| 293 | } | ||
| 294 | |||
| 295 | /* | ||
| 296 | * Check to see if it is now time to start boosting RCU readers blocking | ||
| 297 | * the current grace period, and, if so, tell the rcu_kthread_task to | ||
| 298 | * start boosting them. If there is an expedited boost in progress, | ||
| 299 | * we wait for it to complete. | ||
| 300 | * | ||
| 301 | * If there are no blocked readers blocking the current grace period, | ||
| 302 | * return 0 to let the caller know, otherwise return 1. Note that this | ||
| 303 | * return value is independent of whether or not boosting was done. | ||
| 304 | */ | ||
| 305 | static int rcu_initiate_boost(void) | ||
| 306 | { | ||
| 307 | if (!rcu_preempt_blocked_readers_cgp()) { | ||
| 308 | RCU_TRACE(rcu_preempt_ctrlblk.n_normal_balk_blkd_tasks++); | ||
| 309 | return 0; | ||
| 310 | } | ||
| 311 | if (rcu_preempt_ctrlblk.gp_tasks != NULL && | ||
| 312 | rcu_preempt_ctrlblk.boost_tasks == NULL && | ||
| 313 | rcu_preempt_ctrlblk.boosted_this_gp == 0 && | ||
| 314 | ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) { | ||
| 315 | rcu_preempt_ctrlblk.boost_tasks = rcu_preempt_ctrlblk.gp_tasks; | ||
| 316 | invoke_rcu_kthread(); | ||
| 317 | RCU_TRACE(rcu_preempt_ctrlblk.n_normal_boosts++); | ||
| 318 | } else | ||
| 319 | RCU_TRACE(rcu_initiate_boost_trace()); | ||
| 320 | return 1; | ||
| 321 | } | ||
| 322 | |||
| 323 | /* | ||
| 324 | * Initiate boosting for an expedited grace period. | ||
| 325 | */ | ||
| 326 | static void rcu_initiate_expedited_boost(void) | ||
| 327 | { | ||
| 328 | unsigned long flags; | ||
| 329 | |||
| 330 | raw_local_irq_save(flags); | ||
| 331 | if (!list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) { | ||
| 332 | rcu_preempt_ctrlblk.boost_tasks = | ||
| 333 | rcu_preempt_ctrlblk.blkd_tasks.next; | ||
| 334 | rcu_preempt_ctrlblk.boosted_this_gp = -1; | ||
| 335 | invoke_rcu_kthread(); | ||
| 336 | RCU_TRACE(rcu_preempt_ctrlblk.n_exp_boosts++); | ||
| 337 | } else | ||
| 338 | RCU_TRACE(rcu_initiate_exp_boost_trace()); | ||
| 339 | raw_local_irq_restore(flags); | ||
| 340 | } | ||
| 341 | |||
| 342 | #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000); | ||
| 343 | |||
| 344 | /* | ||
| 345 | * Do priority-boost accounting for the start of a new grace period. | ||
| 346 | */ | ||
| 347 | static void rcu_preempt_boost_start_gp(void) | ||
| 348 | { | ||
| 349 | rcu_preempt_ctrlblk.boost_time = jiffies + RCU_BOOST_DELAY_JIFFIES; | ||
| 350 | if (rcu_preempt_ctrlblk.boosted_this_gp > 0) | ||
| 351 | rcu_preempt_ctrlblk.boosted_this_gp = 0; | ||
| 352 | } | ||
| 353 | |||
| 354 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
| 355 | |||
| 356 | /* | ||
| 357 | * If there is no RCU priority boosting, we don't boost. | ||
| 358 | */ | ||
| 359 | static int rcu_boost(void) | ||
| 360 | { | ||
| 361 | return 0; | ||
| 362 | } | ||
| 363 | |||
| 364 | /* | ||
| 365 | * If there is no RCU priority boosting, we don't initiate boosting, | ||
| 366 | * but we do indicate whether there are blocked readers blocking the | ||
| 367 | * current grace period. | ||
| 368 | */ | ||
| 369 | static int rcu_initiate_boost(void) | ||
| 370 | { | ||
| 371 | return rcu_preempt_blocked_readers_cgp(); | ||
| 372 | } | ||
| 373 | |||
| 374 | /* | ||
| 375 | * If there is no RCU priority boosting, we don't initiate expedited boosting. | ||
| 376 | */ | ||
| 377 | static void rcu_initiate_expedited_boost(void) | ||
| 378 | { | ||
| 379 | } | ||
| 380 | |||
| 381 | /* | ||
| 382 | * If there is no RCU priority boosting, nothing to do at grace-period start. | ||
| 383 | */ | ||
| 384 | static void rcu_preempt_boost_start_gp(void) | ||
| 385 | { | ||
| 386 | } | ||
| 387 | |||
| 388 | #endif /* else #ifdef CONFIG_RCU_BOOST */ | ||
| 389 | |||
| 390 | /* | ||
| 125 | * Record a preemptible-RCU quiescent state for the specified CPU. Note | 391 | * Record a preemptible-RCU quiescent state for the specified CPU. Note |
| 126 | * that this just means that the task currently running on the CPU is | 392 | * that this just means that the task currently running on the CPU is |
| 127 | * in a quiescent state. There might be any number of tasks blocked | 393 | * in a quiescent state. There might be any number of tasks blocked |
| @@ -148,11 +414,14 @@ static void rcu_preempt_cpu_qs(void) | |||
| 148 | rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; | 414 | rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; |
| 149 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; | 415 | current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; |
| 150 | 416 | ||
| 417 | /* If there is no GP then there is nothing more to do. */ | ||
| 418 | if (!rcu_preempt_gp_in_progress()) | ||
| 419 | return; | ||
| 151 | /* | 420 | /* |
| 152 | * If there is no GP, or if blocked readers are still blocking GP, | 421 | * Check up on boosting. If there are no readers blocking the |
| 153 | * then there is nothing more to do. | 422 | * current grace period, leave. |
| 154 | */ | 423 | */ |
| 155 | if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) | 424 | if (rcu_initiate_boost()) |
| 156 | return; | 425 | return; |
| 157 | 426 | ||
| 158 | /* Advance callbacks. */ | 427 | /* Advance callbacks. */ |
| @@ -164,9 +433,9 @@ static void rcu_preempt_cpu_qs(void) | |||
| 164 | if (!rcu_preempt_blocked_readers_any()) | 433 | if (!rcu_preempt_blocked_readers_any()) |
| 165 | rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; | 434 | rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; |
| 166 | 435 | ||
| 167 | /* If there are done callbacks, make RCU_SOFTIRQ process them. */ | 436 | /* If there are done callbacks, cause them to be invoked. */ |
| 168 | if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) | 437 | if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) |
| 169 | raise_softirq(RCU_SOFTIRQ); | 438 | invoke_rcu_kthread(); |
| 170 | } | 439 | } |
| 171 | 440 | ||
| 172 | /* | 441 | /* |
| @@ -178,12 +447,16 @@ static void rcu_preempt_start_gp(void) | |||
| 178 | 447 | ||
| 179 | /* Official start of GP. */ | 448 | /* Official start of GP. */ |
| 180 | rcu_preempt_ctrlblk.gpnum++; | 449 | rcu_preempt_ctrlblk.gpnum++; |
| 450 | RCU_TRACE(rcu_preempt_ctrlblk.n_grace_periods++); | ||
| 181 | 451 | ||
| 182 | /* Any blocked RCU readers block new GP. */ | 452 | /* Any blocked RCU readers block new GP. */ |
| 183 | if (rcu_preempt_blocked_readers_any()) | 453 | if (rcu_preempt_blocked_readers_any()) |
| 184 | rcu_preempt_ctrlblk.gp_tasks = | 454 | rcu_preempt_ctrlblk.gp_tasks = |
| 185 | rcu_preempt_ctrlblk.blkd_tasks.next; | 455 | rcu_preempt_ctrlblk.blkd_tasks.next; |
| 186 | 456 | ||
| 457 | /* Set up for RCU priority boosting. */ | ||
| 458 | rcu_preempt_boost_start_gp(); | ||
| 459 | |||
| 187 | /* If there is no running reader, CPU is done with GP. */ | 460 | /* If there is no running reader, CPU is done with GP. */ |
| 188 | if (!rcu_preempt_running_reader()) | 461 | if (!rcu_preempt_running_reader()) |
| 189 | rcu_preempt_cpu_qs(); | 462 | rcu_preempt_cpu_qs(); |
| @@ -304,14 +577,16 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
| 304 | */ | 577 | */ |
| 305 | empty = !rcu_preempt_blocked_readers_cgp(); | 578 | empty = !rcu_preempt_blocked_readers_cgp(); |
| 306 | empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; | 579 | empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; |
| 307 | np = t->rcu_node_entry.next; | 580 | np = rcu_next_node_entry(t); |
| 308 | if (np == &rcu_preempt_ctrlblk.blkd_tasks) | ||
| 309 | np = NULL; | ||
| 310 | list_del(&t->rcu_node_entry); | 581 | list_del(&t->rcu_node_entry); |
| 311 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) | 582 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) |
| 312 | rcu_preempt_ctrlblk.gp_tasks = np; | 583 | rcu_preempt_ctrlblk.gp_tasks = np; |
| 313 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) | 584 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) |
| 314 | rcu_preempt_ctrlblk.exp_tasks = np; | 585 | rcu_preempt_ctrlblk.exp_tasks = np; |
| 586 | #ifdef CONFIG_RCU_BOOST | ||
| 587 | if (&t->rcu_node_entry == rcu_preempt_ctrlblk.boost_tasks) | ||
| 588 | rcu_preempt_ctrlblk.boost_tasks = np; | ||
| 589 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 315 | INIT_LIST_HEAD(&t->rcu_node_entry); | 590 | INIT_LIST_HEAD(&t->rcu_node_entry); |
| 316 | 591 | ||
| 317 | /* | 592 | /* |
| @@ -331,6 +606,14 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
| 331 | if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) | 606 | if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) |
| 332 | rcu_report_exp_done(); | 607 | rcu_report_exp_done(); |
| 333 | } | 608 | } |
| 609 | #ifdef CONFIG_RCU_BOOST | ||
| 610 | /* Unboost self if was boosted. */ | ||
| 611 | if (special & RCU_READ_UNLOCK_BOOSTED) { | ||
| 612 | t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BOOSTED; | ||
| 613 | rt_mutex_unlock(t->rcu_boost_mutex); | ||
| 614 | t->rcu_boost_mutex = NULL; | ||
| 615 | } | ||
| 616 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 334 | local_irq_restore(flags); | 617 | local_irq_restore(flags); |
| 335 | } | 618 | } |
| 336 | 619 | ||
| @@ -374,7 +657,7 @@ static void rcu_preempt_check_callbacks(void) | |||
| 374 | rcu_preempt_cpu_qs(); | 657 | rcu_preempt_cpu_qs(); |
| 375 | if (&rcu_preempt_ctrlblk.rcb.rcucblist != | 658 | if (&rcu_preempt_ctrlblk.rcb.rcucblist != |
| 376 | rcu_preempt_ctrlblk.rcb.donetail) | 659 | rcu_preempt_ctrlblk.rcb.donetail) |
| 377 | raise_softirq(RCU_SOFTIRQ); | 660 | invoke_rcu_kthread(); |
| 378 | if (rcu_preempt_gp_in_progress() && | 661 | if (rcu_preempt_gp_in_progress() && |
| 379 | rcu_cpu_blocking_cur_gp() && | 662 | rcu_cpu_blocking_cur_gp() && |
| 380 | rcu_preempt_running_reader()) | 663 | rcu_preempt_running_reader()) |
| @@ -383,7 +666,7 @@ static void rcu_preempt_check_callbacks(void) | |||
| 383 | 666 | ||
| 384 | /* | 667 | /* |
| 385 | * TINY_PREEMPT_RCU has an extra callback-list tail pointer to | 668 | * TINY_PREEMPT_RCU has an extra callback-list tail pointer to |
| 386 | * update, so this is invoked from __rcu_process_callbacks() to | 669 | * update, so this is invoked from rcu_process_callbacks() to |
| 387 | * handle that case. Of course, it is invoked for all flavors of | 670 | * handle that case. Of course, it is invoked for all flavors of |
| 388 | * RCU, but RCU callbacks can appear only on one of the lists, and | 671 | * RCU, but RCU callbacks can appear only on one of the lists, and |
| 389 | * neither ->nexttail nor ->donetail can possibly be NULL, so there | 672 | * neither ->nexttail nor ->donetail can possibly be NULL, so there |
| @@ -400,7 +683,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) | |||
| 400 | */ | 683 | */ |
| 401 | static void rcu_preempt_process_callbacks(void) | 684 | static void rcu_preempt_process_callbacks(void) |
| 402 | { | 685 | { |
| 403 | __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); | 686 | rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); |
| 404 | } | 687 | } |
| 405 | 688 | ||
| 406 | /* | 689 | /* |
| @@ -417,6 +700,7 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
| 417 | local_irq_save(flags); | 700 | local_irq_save(flags); |
| 418 | *rcu_preempt_ctrlblk.nexttail = head; | 701 | *rcu_preempt_ctrlblk.nexttail = head; |
| 419 | rcu_preempt_ctrlblk.nexttail = &head->next; | 702 | rcu_preempt_ctrlblk.nexttail = &head->next; |
| 703 | RCU_TRACE(rcu_preempt_ctrlblk.rcb.qlen++); | ||
| 420 | rcu_preempt_start_gp(); /* checks to see if GP needed. */ | 704 | rcu_preempt_start_gp(); /* checks to see if GP needed. */ |
| 421 | local_irq_restore(flags); | 705 | local_irq_restore(flags); |
| 422 | } | 706 | } |
| @@ -532,6 +816,7 @@ void synchronize_rcu_expedited(void) | |||
| 532 | 816 | ||
| 533 | /* Wait for tail of ->blkd_tasks list to drain. */ | 817 | /* Wait for tail of ->blkd_tasks list to drain. */ |
| 534 | if (rcu_preempted_readers_exp()) | 818 | if (rcu_preempted_readers_exp()) |
| 819 | rcu_initiate_expedited_boost(); | ||
| 535 | wait_event(sync_rcu_preempt_exp_wq, | 820 | wait_event(sync_rcu_preempt_exp_wq, |
| 536 | !rcu_preempted_readers_exp()); | 821 | !rcu_preempted_readers_exp()); |
| 537 | 822 | ||
| @@ -572,6 +857,27 @@ void exit_rcu(void) | |||
| 572 | 857 | ||
| 573 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ | 858 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ |
| 574 | 859 | ||
| 860 | #ifdef CONFIG_RCU_TRACE | ||
| 861 | |||
| 862 | /* | ||
| 863 | * Because preemptible RCU does not exist, it is not necessary to | ||
| 864 | * dump out its statistics. | ||
| 865 | */ | ||
| 866 | static void show_tiny_preempt_stats(struct seq_file *m) | ||
| 867 | { | ||
| 868 | } | ||
| 869 | |||
| 870 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
| 871 | |||
| 872 | /* | ||
| 873 | * Because preemptible RCU does not exist, it is never necessary to | ||
| 874 | * boost preempted RCU readers. | ||
| 875 | */ | ||
| 876 | static int rcu_boost(void) | ||
| 877 | { | ||
| 878 | return 0; | ||
| 879 | } | ||
| 880 | |||
| 575 | /* | 881 | /* |
| 576 | * Because preemptible RCU does not exist, it never has any callbacks | 882 | * Because preemptible RCU does not exist, it never has any callbacks |
| 577 | * to check. | 883 | * to check. |
| @@ -599,17 +905,116 @@ static void rcu_preempt_process_callbacks(void) | |||
| 599 | #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ | 905 | #endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ |
| 600 | 906 | ||
| 601 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 907 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 602 | |||
| 603 | #include <linux/kernel_stat.h> | 908 | #include <linux/kernel_stat.h> |
| 604 | 909 | ||
| 605 | /* | 910 | /* |
| 606 | * During boot, we forgive RCU lockdep issues. After this function is | 911 | * During boot, we forgive RCU lockdep issues. After this function is |
| 607 | * invoked, we start taking RCU lockdep issues seriously. | 912 | * invoked, we start taking RCU lockdep issues seriously. |
| 608 | */ | 913 | */ |
| 609 | void rcu_scheduler_starting(void) | 914 | void __init rcu_scheduler_starting(void) |
| 610 | { | 915 | { |
| 611 | WARN_ON(nr_context_switches() > 0); | 916 | WARN_ON(nr_context_switches() > 0); |
| 612 | rcu_scheduler_active = 1; | 917 | rcu_scheduler_active = 1; |
| 613 | } | 918 | } |
| 614 | 919 | ||
| 615 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 920 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
| 921 | |||
| 922 | #ifdef CONFIG_RCU_BOOST | ||
| 923 | #define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO | ||
| 924 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
| 925 | #define RCU_BOOST_PRIO 1 | ||
| 926 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||
| 927 | |||
| 928 | #ifdef CONFIG_RCU_TRACE | ||
| 929 | |||
| 930 | #ifdef CONFIG_RCU_BOOST | ||
| 931 | |||
| 932 | static void rcu_initiate_boost_trace(void) | ||
| 933 | { | ||
| 934 | if (rcu_preempt_ctrlblk.gp_tasks == NULL) | ||
| 935 | rcu_preempt_ctrlblk.n_normal_balk_gp_tasks++; | ||
| 936 | else if (rcu_preempt_ctrlblk.boost_tasks != NULL) | ||
| 937 | rcu_preempt_ctrlblk.n_normal_balk_boost_tasks++; | ||
| 938 | else if (rcu_preempt_ctrlblk.boosted_this_gp != 0) | ||
| 939 | rcu_preempt_ctrlblk.n_normal_balk_boosted++; | ||
| 940 | else if (!ULONG_CMP_GE(jiffies, rcu_preempt_ctrlblk.boost_time)) | ||
| 941 | rcu_preempt_ctrlblk.n_normal_balk_notyet++; | ||
| 942 | else | ||
| 943 | rcu_preempt_ctrlblk.n_normal_balk_nos++; | ||
| 944 | } | ||
| 945 | |||
| 946 | static void rcu_initiate_exp_boost_trace(void) | ||
| 947 | { | ||
| 948 | if (list_empty(&rcu_preempt_ctrlblk.blkd_tasks)) | ||
| 949 | rcu_preempt_ctrlblk.n_exp_balk_blkd_tasks++; | ||
| 950 | else | ||
| 951 | rcu_preempt_ctrlblk.n_exp_balk_nos++; | ||
| 952 | } | ||
| 953 | |||
| 954 | #endif /* #ifdef CONFIG_RCU_BOOST */ | ||
| 955 | |||
| 956 | static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n) | ||
| 957 | { | ||
| 958 | unsigned long flags; | ||
| 959 | |||
| 960 | raw_local_irq_save(flags); | ||
| 961 | rcp->qlen -= n; | ||
| 962 | raw_local_irq_restore(flags); | ||
| 963 | } | ||
| 964 | |||
| 965 | /* | ||
| 966 | * Dump statistics for TINY_RCU, such as they are. | ||
| 967 | */ | ||
| 968 | static int show_tiny_stats(struct seq_file *m, void *unused) | ||
| 969 | { | ||
| 970 | show_tiny_preempt_stats(m); | ||
| 971 | seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen); | ||
| 972 | seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen); | ||
| 973 | return 0; | ||
| 974 | } | ||
| 975 | |||
| 976 | static int show_tiny_stats_open(struct inode *inode, struct file *file) | ||
| 977 | { | ||
| 978 | return single_open(file, show_tiny_stats, NULL); | ||
| 979 | } | ||
| 980 | |||
| 981 | static const struct file_operations show_tiny_stats_fops = { | ||
| 982 | .owner = THIS_MODULE, | ||
| 983 | .open = show_tiny_stats_open, | ||
| 984 | .read = seq_read, | ||
| 985 | .llseek = seq_lseek, | ||
| 986 | .release = single_release, | ||
| 987 | }; | ||
| 988 | |||
| 989 | static struct dentry *rcudir; | ||
| 990 | |||
| 991 | static int __init rcutiny_trace_init(void) | ||
| 992 | { | ||
| 993 | struct dentry *retval; | ||
| 994 | |||
| 995 | rcudir = debugfs_create_dir("rcu", NULL); | ||
| 996 | if (!rcudir) | ||
| 997 | goto free_out; | ||
| 998 | retval = debugfs_create_file("rcudata", 0444, rcudir, | ||
| 999 | NULL, &show_tiny_stats_fops); | ||
| 1000 | if (!retval) | ||
| 1001 | goto free_out; | ||
| 1002 | return 0; | ||
| 1003 | free_out: | ||
| 1004 | debugfs_remove_recursive(rcudir); | ||
| 1005 | return 1; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | static void __exit rcutiny_trace_cleanup(void) | ||
| 1009 | { | ||
| 1010 | debugfs_remove_recursive(rcudir); | ||
| 1011 | } | ||
| 1012 | |||
| 1013 | module_init(rcutiny_trace_init); | ||
| 1014 | module_exit(rcutiny_trace_cleanup); | ||
| 1015 | |||
| 1016 | MODULE_AUTHOR("Paul E. McKenney"); | ||
| 1017 | MODULE_DESCRIPTION("Read-Copy Update tracing for tiny implementation"); | ||
| 1018 | MODULE_LICENSE("GPL"); | ||
| 1019 | |||
| 1020 | #endif /* #ifdef CONFIG_RCU_TRACE */ | ||
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 9d8e8fb2515f..89613f97ff26 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
| @@ -47,6 +47,7 @@ | |||
| 47 | #include <linux/srcu.h> | 47 | #include <linux/srcu.h> |
| 48 | #include <linux/slab.h> | 48 | #include <linux/slab.h> |
| 49 | #include <asm/byteorder.h> | 49 | #include <asm/byteorder.h> |
| 50 | #include <linux/sched.h> | ||
| 50 | 51 | ||
| 51 | MODULE_LICENSE("GPL"); | 52 | MODULE_LICENSE("GPL"); |
| 52 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " | 53 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " |
| @@ -64,6 +65,9 @@ static int irqreader = 1; /* RCU readers from irq (timers). */ | |||
| 64 | static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */ | 65 | static int fqs_duration = 0; /* Duration of bursts (us), 0 to disable. */ |
| 65 | static int fqs_holdoff = 0; /* Hold time within burst (us). */ | 66 | static int fqs_holdoff = 0; /* Hold time within burst (us). */ |
| 66 | static int fqs_stutter = 3; /* Wait time between bursts (s). */ | 67 | static int fqs_stutter = 3; /* Wait time between bursts (s). */ |
| 68 | static int test_boost = 1; /* Test RCU prio boost: 0=no, 1=maybe, 2=yes. */ | ||
| 69 | static int test_boost_interval = 7; /* Interval between boost tests, seconds. */ | ||
| 70 | static int test_boost_duration = 4; /* Duration of each boost test, seconds. */ | ||
| 67 | static char *torture_type = "rcu"; /* What RCU implementation to torture. */ | 71 | static char *torture_type = "rcu"; /* What RCU implementation to torture. */ |
| 68 | 72 | ||
| 69 | module_param(nreaders, int, 0444); | 73 | module_param(nreaders, int, 0444); |
| @@ -88,6 +92,12 @@ module_param(fqs_holdoff, int, 0444); | |||
| 88 | MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); | 92 | MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); |
| 89 | module_param(fqs_stutter, int, 0444); | 93 | module_param(fqs_stutter, int, 0444); |
| 90 | MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); | 94 | MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); |
| 95 | module_param(test_boost, int, 0444); | ||
| 96 | MODULE_PARM_DESC(test_boost, "Test RCU prio boost: 0=no, 1=maybe, 2=yes."); | ||
| 97 | module_param(test_boost_interval, int, 0444); | ||
| 98 | MODULE_PARM_DESC(test_boost_interval, "Interval between boost tests, seconds."); | ||
| 99 | module_param(test_boost_duration, int, 0444); | ||
| 100 | MODULE_PARM_DESC(test_boost_duration, "Duration of each boost test, seconds."); | ||
| 91 | module_param(torture_type, charp, 0444); | 101 | module_param(torture_type, charp, 0444); |
| 92 | MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); | 102 | MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); |
| 93 | 103 | ||
| @@ -109,6 +119,7 @@ static struct task_struct *stats_task; | |||
| 109 | static struct task_struct *shuffler_task; | 119 | static struct task_struct *shuffler_task; |
| 110 | static struct task_struct *stutter_task; | 120 | static struct task_struct *stutter_task; |
| 111 | static struct task_struct *fqs_task; | 121 | static struct task_struct *fqs_task; |
| 122 | static struct task_struct *boost_tasks[NR_CPUS]; | ||
| 112 | 123 | ||
| 113 | #define RCU_TORTURE_PIPE_LEN 10 | 124 | #define RCU_TORTURE_PIPE_LEN 10 |
| 114 | 125 | ||
| @@ -134,6 +145,12 @@ static atomic_t n_rcu_torture_alloc_fail; | |||
| 134 | static atomic_t n_rcu_torture_free; | 145 | static atomic_t n_rcu_torture_free; |
| 135 | static atomic_t n_rcu_torture_mberror; | 146 | static atomic_t n_rcu_torture_mberror; |
| 136 | static atomic_t n_rcu_torture_error; | 147 | static atomic_t n_rcu_torture_error; |
| 148 | static long n_rcu_torture_boost_ktrerror; | ||
| 149 | static long n_rcu_torture_boost_rterror; | ||
| 150 | static long n_rcu_torture_boost_allocerror; | ||
| 151 | static long n_rcu_torture_boost_afferror; | ||
| 152 | static long n_rcu_torture_boost_failure; | ||
| 153 | static long n_rcu_torture_boosts; | ||
| 137 | static long n_rcu_torture_timers; | 154 | static long n_rcu_torture_timers; |
| 138 | static struct list_head rcu_torture_removed; | 155 | static struct list_head rcu_torture_removed; |
| 139 | static cpumask_var_t shuffle_tmp_mask; | 156 | static cpumask_var_t shuffle_tmp_mask; |
| @@ -147,6 +164,16 @@ static int stutter_pause_test; | |||
| 147 | #endif | 164 | #endif |
| 148 | int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; | 165 | int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; |
| 149 | 166 | ||
| 167 | #ifdef CONFIG_RCU_BOOST | ||
| 168 | #define rcu_can_boost() 1 | ||
| 169 | #else /* #ifdef CONFIG_RCU_BOOST */ | ||
| 170 | #define rcu_can_boost() 0 | ||
| 171 | #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||
| 172 | |||
| 173 | static unsigned long boost_starttime; /* jiffies of next boost test start. */ | ||
| 174 | DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ | ||
| 175 | /* and boost task create/destroy. */ | ||
| 176 | |||
| 150 | /* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ | 177 | /* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ |
| 151 | 178 | ||
| 152 | #define FULLSTOP_DONTSTOP 0 /* Normal operation. */ | 179 | #define FULLSTOP_DONTSTOP 0 /* Normal operation. */ |
| @@ -277,6 +304,7 @@ struct rcu_torture_ops { | |||
| 277 | void (*fqs)(void); | 304 | void (*fqs)(void); |
| 278 | int (*stats)(char *page); | 305 | int (*stats)(char *page); |
| 279 | int irq_capable; | 306 | int irq_capable; |
| 307 | int can_boost; | ||
| 280 | char *name; | 308 | char *name; |
| 281 | }; | 309 | }; |
| 282 | 310 | ||
| @@ -366,6 +394,7 @@ static struct rcu_torture_ops rcu_ops = { | |||
| 366 | .fqs = rcu_force_quiescent_state, | 394 | .fqs = rcu_force_quiescent_state, |
| 367 | .stats = NULL, | 395 | .stats = NULL, |
| 368 | .irq_capable = 1, | 396 | .irq_capable = 1, |
| 397 | .can_boost = rcu_can_boost(), | ||
| 369 | .name = "rcu" | 398 | .name = "rcu" |
| 370 | }; | 399 | }; |
| 371 | 400 | ||
| @@ -408,6 +437,7 @@ static struct rcu_torture_ops rcu_sync_ops = { | |||
| 408 | .fqs = rcu_force_quiescent_state, | 437 | .fqs = rcu_force_quiescent_state, |
| 409 | .stats = NULL, | 438 | .stats = NULL, |
| 410 | .irq_capable = 1, | 439 | .irq_capable = 1, |
| 440 | .can_boost = rcu_can_boost(), | ||
| 411 | .name = "rcu_sync" | 441 | .name = "rcu_sync" |
| 412 | }; | 442 | }; |
| 413 | 443 | ||
| @@ -424,6 +454,7 @@ static struct rcu_torture_ops rcu_expedited_ops = { | |||
| 424 | .fqs = rcu_force_quiescent_state, | 454 | .fqs = rcu_force_quiescent_state, |
| 425 | .stats = NULL, | 455 | .stats = NULL, |
| 426 | .irq_capable = 1, | 456 | .irq_capable = 1, |
| 457 | .can_boost = rcu_can_boost(), | ||
| 427 | .name = "rcu_expedited" | 458 | .name = "rcu_expedited" |
| 428 | }; | 459 | }; |
| 429 | 460 | ||
| @@ -684,6 +715,110 @@ static struct rcu_torture_ops sched_expedited_ops = { | |||
| 684 | }; | 715 | }; |
| 685 | 716 | ||
| 686 | /* | 717 | /* |
| 718 | * RCU torture priority-boost testing. Runs one real-time thread per | ||
| 719 | * CPU for moderate bursts, repeatedly registering RCU callbacks and | ||
| 720 | * spinning waiting for them to be invoked. If a given callback takes | ||
| 721 | * too long to be invoked, we assume that priority inversion has occurred. | ||
| 722 | */ | ||
| 723 | |||
| 724 | struct rcu_boost_inflight { | ||
| 725 | struct rcu_head rcu; | ||
| 726 | int inflight; | ||
| 727 | }; | ||
| 728 | |||
| 729 | static void rcu_torture_boost_cb(struct rcu_head *head) | ||
| 730 | { | ||
| 731 | struct rcu_boost_inflight *rbip = | ||
| 732 | container_of(head, struct rcu_boost_inflight, rcu); | ||
| 733 | |||
| 734 | smp_mb(); /* Ensure RCU-core accesses precede clearing ->inflight */ | ||
| 735 | rbip->inflight = 0; | ||
| 736 | } | ||
| 737 | |||
| 738 | static int rcu_torture_boost(void *arg) | ||
| 739 | { | ||
| 740 | unsigned long call_rcu_time; | ||
| 741 | unsigned long endtime; | ||
| 742 | unsigned long oldstarttime; | ||
| 743 | struct rcu_boost_inflight rbi = { .inflight = 0 }; | ||
| 744 | struct sched_param sp; | ||
| 745 | |||
| 746 | VERBOSE_PRINTK_STRING("rcu_torture_boost started"); | ||
| 747 | |||
| 748 | /* Set real-time priority. */ | ||
| 749 | sp.sched_priority = 1; | ||
| 750 | if (sched_setscheduler(current, SCHED_FIFO, &sp) < 0) { | ||
| 751 | VERBOSE_PRINTK_STRING("rcu_torture_boost RT prio failed!"); | ||
| 752 | n_rcu_torture_boost_rterror++; | ||
| 753 | } | ||
| 754 | |||
| 755 | /* Each pass through the following loop does one boost-test cycle. */ | ||
| 756 | do { | ||
| 757 | /* Wait for the next test interval. */ | ||
| 758 | oldstarttime = boost_starttime; | ||
| 759 | while (jiffies - oldstarttime > ULONG_MAX / 2) { | ||
| 760 | schedule_timeout_uninterruptible(1); | ||
| 761 | rcu_stutter_wait("rcu_torture_boost"); | ||
| 762 | if (kthread_should_stop() || | ||
| 763 | fullstop != FULLSTOP_DONTSTOP) | ||
| 764 | goto checkwait; | ||
| 765 | } | ||
| 766 | |||
| 767 | /* Do one boost-test interval. */ | ||
| 768 | endtime = oldstarttime + test_boost_duration * HZ; | ||
| 769 | call_rcu_time = jiffies; | ||
| 770 | while (jiffies - endtime > ULONG_MAX / 2) { | ||
| 771 | /* If we don't have a callback in flight, post one. */ | ||
| 772 | if (!rbi.inflight) { | ||
| 773 | smp_mb(); /* RCU core before ->inflight = 1. */ | ||
| 774 | rbi.inflight = 1; | ||
| 775 | call_rcu(&rbi.rcu, rcu_torture_boost_cb); | ||
| 776 | if (jiffies - call_rcu_time > | ||
| 777 | test_boost_duration * HZ - HZ / 2) { | ||
| 778 | VERBOSE_PRINTK_STRING("rcu_torture_boost boosting failed"); | ||
| 779 | n_rcu_torture_boost_failure++; | ||
| 780 | } | ||
| 781 | call_rcu_time = jiffies; | ||
| 782 | } | ||
| 783 | cond_resched(); | ||
| 784 | rcu_stutter_wait("rcu_torture_boost"); | ||
| 785 | if (kthread_should_stop() || | ||
| 786 | fullstop != FULLSTOP_DONTSTOP) | ||
| 787 | goto checkwait; | ||
| 788 | } | ||
| 789 | |||
| 790 | /* | ||
| 791 | * Set the start time of the next test interval. | ||
| 792 | * Yes, this is vulnerable to long delays, but such | ||
| 793 | * delays simply cause a false negative for the next | ||
| 794 | * interval. Besides, we are running at RT priority, | ||
| 795 | * so delays should be relatively rare. | ||
| 796 | */ | ||
| 797 | while (oldstarttime == boost_starttime) { | ||
| 798 | if (mutex_trylock(&boost_mutex)) { | ||
| 799 | boost_starttime = jiffies + | ||
| 800 | test_boost_interval * HZ; | ||
| 801 | n_rcu_torture_boosts++; | ||
| 802 | mutex_unlock(&boost_mutex); | ||
| 803 | break; | ||
| 804 | } | ||
| 805 | schedule_timeout_uninterruptible(1); | ||
| 806 | } | ||
| 807 | |||
| 808 | /* Go do the stutter. */ | ||
| 809 | checkwait: rcu_stutter_wait("rcu_torture_boost"); | ||
| 810 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); | ||
| 811 | |||
| 812 | /* Clean up and exit. */ | ||
| 813 | VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); | ||
| 814 | rcutorture_shutdown_absorb("rcu_torture_boost"); | ||
| 815 | while (!kthread_should_stop() || rbi.inflight) | ||
| 816 | schedule_timeout_uninterruptible(1); | ||
| 817 | smp_mb(); /* order accesses to ->inflight before stack-frame death. */ | ||
| 818 | return 0; | ||
| 819 | } | ||
| 820 | |||
| 821 | /* | ||
| 687 | * RCU torture force-quiescent-state kthread. Repeatedly induces | 822 | * RCU torture force-quiescent-state kthread. Repeatedly induces |
| 688 | * bursts of calls to force_quiescent_state(), increasing the probability | 823 | * bursts of calls to force_quiescent_state(), increasing the probability |
| 689 | * of occurrence of some important types of race conditions. | 824 | * of occurrence of some important types of race conditions. |
| @@ -933,7 +1068,8 @@ rcu_torture_printk(char *page) | |||
| 933 | cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); | 1068 | cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); |
| 934 | cnt += sprintf(&page[cnt], | 1069 | cnt += sprintf(&page[cnt], |
| 935 | "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d " | 1070 | "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d " |
| 936 | "rtmbe: %d nt: %ld", | 1071 | "rtmbe: %d rtbke: %ld rtbre: %ld rtbae: %ld rtbafe: %ld " |
| 1072 | "rtbf: %ld rtb: %ld nt: %ld", | ||
| 937 | rcu_torture_current, | 1073 | rcu_torture_current, |
| 938 | rcu_torture_current_version, | 1074 | rcu_torture_current_version, |
| 939 | list_empty(&rcu_torture_freelist), | 1075 | list_empty(&rcu_torture_freelist), |
| @@ -941,8 +1077,19 @@ rcu_torture_printk(char *page) | |||
| 941 | atomic_read(&n_rcu_torture_alloc_fail), | 1077 | atomic_read(&n_rcu_torture_alloc_fail), |
| 942 | atomic_read(&n_rcu_torture_free), | 1078 | atomic_read(&n_rcu_torture_free), |
| 943 | atomic_read(&n_rcu_torture_mberror), | 1079 | atomic_read(&n_rcu_torture_mberror), |
| 1080 | n_rcu_torture_boost_ktrerror, | ||
| 1081 | n_rcu_torture_boost_rterror, | ||
| 1082 | n_rcu_torture_boost_allocerror, | ||
| 1083 | n_rcu_torture_boost_afferror, | ||
| 1084 | n_rcu_torture_boost_failure, | ||
| 1085 | n_rcu_torture_boosts, | ||
| 944 | n_rcu_torture_timers); | 1086 | n_rcu_torture_timers); |
| 945 | if (atomic_read(&n_rcu_torture_mberror) != 0) | 1087 | if (atomic_read(&n_rcu_torture_mberror) != 0 || |
| 1088 | n_rcu_torture_boost_ktrerror != 0 || | ||
| 1089 | n_rcu_torture_boost_rterror != 0 || | ||
| 1090 | n_rcu_torture_boost_allocerror != 0 || | ||
| 1091 | n_rcu_torture_boost_afferror != 0 || | ||
| 1092 | n_rcu_torture_boost_failure != 0) | ||
| 946 | cnt += sprintf(&page[cnt], " !!!"); | 1093 | cnt += sprintf(&page[cnt], " !!!"); |
| 947 | cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); | 1094 | cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); |
| 948 | if (i > 1) { | 1095 | if (i > 1) { |
| @@ -1094,22 +1241,91 @@ rcu_torture_stutter(void *arg) | |||
| 1094 | } | 1241 | } |
| 1095 | 1242 | ||
| 1096 | static inline void | 1243 | static inline void |
| 1097 | rcu_torture_print_module_parms(char *tag) | 1244 | rcu_torture_print_module_parms(struct rcu_torture_ops *cur_ops, char *tag) |
| 1098 | { | 1245 | { |
| 1099 | printk(KERN_ALERT "%s" TORTURE_FLAG | 1246 | printk(KERN_ALERT "%s" TORTURE_FLAG |
| 1100 | "--- %s: nreaders=%d nfakewriters=%d " | 1247 | "--- %s: nreaders=%d nfakewriters=%d " |
| 1101 | "stat_interval=%d verbose=%d test_no_idle_hz=%d " | 1248 | "stat_interval=%d verbose=%d test_no_idle_hz=%d " |
| 1102 | "shuffle_interval=%d stutter=%d irqreader=%d " | 1249 | "shuffle_interval=%d stutter=%d irqreader=%d " |
| 1103 | "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d\n", | 1250 | "fqs_duration=%d fqs_holdoff=%d fqs_stutter=%d " |
| 1251 | "test_boost=%d/%d test_boost_interval=%d " | ||
| 1252 | "test_boost_duration=%d\n", | ||
| 1104 | torture_type, tag, nrealreaders, nfakewriters, | 1253 | torture_type, tag, nrealreaders, nfakewriters, |
| 1105 | stat_interval, verbose, test_no_idle_hz, shuffle_interval, | 1254 | stat_interval, verbose, test_no_idle_hz, shuffle_interval, |
| 1106 | stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter); | 1255 | stutter, irqreader, fqs_duration, fqs_holdoff, fqs_stutter, |
| 1256 | test_boost, cur_ops->can_boost, | ||
| 1257 | test_boost_interval, test_boost_duration); | ||
| 1107 | } | 1258 | } |
| 1108 | 1259 | ||
| 1109 | static struct notifier_block rcutorture_nb = { | 1260 | static struct notifier_block rcutorture_shutdown_nb = { |
| 1110 | .notifier_call = rcutorture_shutdown_notify, | 1261 | .notifier_call = rcutorture_shutdown_notify, |
| 1111 | }; | 1262 | }; |
| 1112 | 1263 | ||
| 1264 | static void rcutorture_booster_cleanup(int cpu) | ||
| 1265 | { | ||
| 1266 | struct task_struct *t; | ||
| 1267 | |||
| 1268 | if (boost_tasks[cpu] == NULL) | ||
| 1269 | return; | ||
| 1270 | mutex_lock(&boost_mutex); | ||
| 1271 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_boost task"); | ||
| 1272 | t = boost_tasks[cpu]; | ||
| 1273 | boost_tasks[cpu] = NULL; | ||
| 1274 | mutex_unlock(&boost_mutex); | ||
| 1275 | |||
| 1276 | /* This must be outside of the mutex, otherwise deadlock! */ | ||
| 1277 | kthread_stop(t); | ||
| 1278 | } | ||
| 1279 | |||
| 1280 | static int rcutorture_booster_init(int cpu) | ||
| 1281 | { | ||
| 1282 | int retval; | ||
| 1283 | |||
| 1284 | if (boost_tasks[cpu] != NULL) | ||
| 1285 | return 0; /* Already created, nothing more to do. */ | ||
| 1286 | |||
| 1287 | /* Don't allow time recalculation while creating a new task. */ | ||
| 1288 | mutex_lock(&boost_mutex); | ||
| 1289 | VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); | ||
| 1290 | boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL, | ||
| 1291 | "rcu_torture_boost"); | ||
| 1292 | if (IS_ERR(boost_tasks[cpu])) { | ||
| 1293 | retval = PTR_ERR(boost_tasks[cpu]); | ||
| 1294 | VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); | ||
| 1295 | n_rcu_torture_boost_ktrerror++; | ||
| 1296 | boost_tasks[cpu] = NULL; | ||
| 1297 | mutex_unlock(&boost_mutex); | ||
| 1298 | return retval; | ||
| 1299 | } | ||
| 1300 | kthread_bind(boost_tasks[cpu], cpu); | ||
| 1301 | wake_up_process(boost_tasks[cpu]); | ||
| 1302 | mutex_unlock(&boost_mutex); | ||
| 1303 | return 0; | ||
| 1304 | } | ||
| 1305 | |||
| 1306 | static int rcutorture_cpu_notify(struct notifier_block *self, | ||
| 1307 | unsigned long action, void *hcpu) | ||
| 1308 | { | ||
| 1309 | long cpu = (long)hcpu; | ||
| 1310 | |||
| 1311 | switch (action) { | ||
| 1312 | case CPU_ONLINE: | ||
| 1313 | case CPU_DOWN_FAILED: | ||
| 1314 | (void)rcutorture_booster_init(cpu); | ||
| 1315 | break; | ||
| 1316 | case CPU_DOWN_PREPARE: | ||
| 1317 | rcutorture_booster_cleanup(cpu); | ||
| 1318 | break; | ||
| 1319 | default: | ||
| 1320 | break; | ||
| 1321 | } | ||
| 1322 | return NOTIFY_OK; | ||
| 1323 | } | ||
| 1324 | |||
| 1325 | static struct notifier_block rcutorture_cpu_nb = { | ||
| 1326 | .notifier_call = rcutorture_cpu_notify, | ||
| 1327 | }; | ||
| 1328 | |||
| 1113 | static void | 1329 | static void |
| 1114 | rcu_torture_cleanup(void) | 1330 | rcu_torture_cleanup(void) |
| 1115 | { | 1331 | { |
| @@ -1127,7 +1343,7 @@ rcu_torture_cleanup(void) | |||
| 1127 | } | 1343 | } |
| 1128 | fullstop = FULLSTOP_RMMOD; | 1344 | fullstop = FULLSTOP_RMMOD; |
| 1129 | mutex_unlock(&fullstop_mutex); | 1345 | mutex_unlock(&fullstop_mutex); |
| 1130 | unregister_reboot_notifier(&rcutorture_nb); | 1346 | unregister_reboot_notifier(&rcutorture_shutdown_nb); |
| 1131 | if (stutter_task) { | 1347 | if (stutter_task) { |
| 1132 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); | 1348 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); |
| 1133 | kthread_stop(stutter_task); | 1349 | kthread_stop(stutter_task); |
| @@ -1184,6 +1400,12 @@ rcu_torture_cleanup(void) | |||
| 1184 | kthread_stop(fqs_task); | 1400 | kthread_stop(fqs_task); |
| 1185 | } | 1401 | } |
| 1186 | fqs_task = NULL; | 1402 | fqs_task = NULL; |
| 1403 | if ((test_boost == 1 && cur_ops->can_boost) || | ||
| 1404 | test_boost == 2) { | ||
| 1405 | unregister_cpu_notifier(&rcutorture_cpu_nb); | ||
| 1406 | for_each_possible_cpu(i) | ||
| 1407 | rcutorture_booster_cleanup(i); | ||
| 1408 | } | ||
| 1187 | 1409 | ||
| 1188 | /* Wait for all RCU callbacks to fire. */ | 1410 | /* Wait for all RCU callbacks to fire. */ |
| 1189 | 1411 | ||
| @@ -1195,9 +1417,9 @@ rcu_torture_cleanup(void) | |||
| 1195 | if (cur_ops->cleanup) | 1417 | if (cur_ops->cleanup) |
| 1196 | cur_ops->cleanup(); | 1418 | cur_ops->cleanup(); |
| 1197 | if (atomic_read(&n_rcu_torture_error)) | 1419 | if (atomic_read(&n_rcu_torture_error)) |
| 1198 | rcu_torture_print_module_parms("End of test: FAILURE"); | 1420 | rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); |
| 1199 | else | 1421 | else |
| 1200 | rcu_torture_print_module_parms("End of test: SUCCESS"); | 1422 | rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS"); |
| 1201 | } | 1423 | } |
| 1202 | 1424 | ||
| 1203 | static int __init | 1425 | static int __init |
| @@ -1242,7 +1464,7 @@ rcu_torture_init(void) | |||
| 1242 | nrealreaders = nreaders; | 1464 | nrealreaders = nreaders; |
| 1243 | else | 1465 | else |
| 1244 | nrealreaders = 2 * num_online_cpus(); | 1466 | nrealreaders = 2 * num_online_cpus(); |
| 1245 | rcu_torture_print_module_parms("Start of test"); | 1467 | rcu_torture_print_module_parms(cur_ops, "Start of test"); |
| 1246 | fullstop = FULLSTOP_DONTSTOP; | 1468 | fullstop = FULLSTOP_DONTSTOP; |
| 1247 | 1469 | ||
| 1248 | /* Set up the freelist. */ | 1470 | /* Set up the freelist. */ |
| @@ -1263,6 +1485,12 @@ rcu_torture_init(void) | |||
| 1263 | atomic_set(&n_rcu_torture_free, 0); | 1485 | atomic_set(&n_rcu_torture_free, 0); |
| 1264 | atomic_set(&n_rcu_torture_mberror, 0); | 1486 | atomic_set(&n_rcu_torture_mberror, 0); |
| 1265 | atomic_set(&n_rcu_torture_error, 0); | 1487 | atomic_set(&n_rcu_torture_error, 0); |
| 1488 | n_rcu_torture_boost_ktrerror = 0; | ||
| 1489 | n_rcu_torture_boost_rterror = 0; | ||
| 1490 | n_rcu_torture_boost_allocerror = 0; | ||
| 1491 | n_rcu_torture_boost_afferror = 0; | ||
| 1492 | n_rcu_torture_boost_failure = 0; | ||
| 1493 | n_rcu_torture_boosts = 0; | ||
| 1266 | for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) | 1494 | for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) |
| 1267 | atomic_set(&rcu_torture_wcount[i], 0); | 1495 | atomic_set(&rcu_torture_wcount[i], 0); |
| 1268 | for_each_possible_cpu(cpu) { | 1496 | for_each_possible_cpu(cpu) { |
| @@ -1376,7 +1604,27 @@ rcu_torture_init(void) | |||
| 1376 | goto unwind; | 1604 | goto unwind; |
| 1377 | } | 1605 | } |
| 1378 | } | 1606 | } |
| 1379 | register_reboot_notifier(&rcutorture_nb); | 1607 | if (test_boost_interval < 1) |
| 1608 | test_boost_interval = 1; | ||
| 1609 | if (test_boost_duration < 2) | ||
| 1610 | test_boost_duration = 2; | ||
| 1611 | if ((test_boost == 1 && cur_ops->can_boost) || | ||
| 1612 | test_boost == 2) { | ||
| 1613 | int retval; | ||
| 1614 | |||
| 1615 | boost_starttime = jiffies + test_boost_interval * HZ; | ||
| 1616 | register_cpu_notifier(&rcutorture_cpu_nb); | ||
| 1617 | for_each_possible_cpu(i) { | ||
| 1618 | if (cpu_is_offline(i)) | ||
| 1619 | continue; /* Heuristic: CPU can go offline. */ | ||
| 1620 | retval = rcutorture_booster_init(i); | ||
| 1621 | if (retval < 0) { | ||
| 1622 | firsterr = retval; | ||
| 1623 | goto unwind; | ||
| 1624 | } | ||
| 1625 | } | ||
| 1626 | } | ||
| 1627 | register_reboot_notifier(&rcutorture_shutdown_nb); | ||
| 1380 | mutex_unlock(&fullstop_mutex); | 1628 | mutex_unlock(&fullstop_mutex); |
| 1381 | return 0; | 1629 | return 0; |
| 1382 | 1630 | ||
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index ccdc04c47981..d0ddfea6579d 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -67,9 +67,6 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; | |||
| 67 | .gpnum = -300, \ | 67 | .gpnum = -300, \ |
| 68 | .completed = -300, \ | 68 | .completed = -300, \ |
| 69 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ | 69 | .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ |
| 70 | .orphan_cbs_list = NULL, \ | ||
| 71 | .orphan_cbs_tail = &structname.orphan_cbs_list, \ | ||
| 72 | .orphan_qlen = 0, \ | ||
| 73 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ | 70 | .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ |
| 74 | .n_force_qs = 0, \ | 71 | .n_force_qs = 0, \ |
| 75 | .n_force_qs_ngp = 0, \ | 72 | .n_force_qs_ngp = 0, \ |
| @@ -620,9 +617,17 @@ static void __init check_cpu_stall_init(void) | |||
| 620 | static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) | 617 | static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) |
| 621 | { | 618 | { |
| 622 | if (rdp->gpnum != rnp->gpnum) { | 619 | if (rdp->gpnum != rnp->gpnum) { |
| 623 | rdp->qs_pending = 1; | 620 | /* |
| 624 | rdp->passed_quiesc = 0; | 621 | * If the current grace period is waiting for this CPU, |
| 622 | * set up to detect a quiescent state, otherwise don't | ||
| 623 | * go looking for one. | ||
| 624 | */ | ||
| 625 | rdp->gpnum = rnp->gpnum; | 625 | rdp->gpnum = rnp->gpnum; |
| 626 | if (rnp->qsmask & rdp->grpmask) { | ||
| 627 | rdp->qs_pending = 1; | ||
| 628 | rdp->passed_quiesc = 0; | ||
| 629 | } else | ||
| 630 | rdp->qs_pending = 0; | ||
| 626 | } | 631 | } |
| 627 | } | 632 | } |
| 628 | 633 | ||
| @@ -681,6 +686,24 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat | |||
| 681 | 686 | ||
| 682 | /* Remember that we saw this grace-period completion. */ | 687 | /* Remember that we saw this grace-period completion. */ |
| 683 | rdp->completed = rnp->completed; | 688 | rdp->completed = rnp->completed; |
| 689 | |||
| 690 | /* | ||
| 691 | * If we were in an extended quiescent state, we may have | ||
| 692 | * missed some grace periods that others CPUs handled on | ||
| 693 | * our behalf. Catch up with this state to avoid noting | ||
| 694 | * spurious new grace periods. If another grace period | ||
| 695 | * has started, then rnp->gpnum will have advanced, so | ||
| 696 | * we will detect this later on. | ||
| 697 | */ | ||
| 698 | if (ULONG_CMP_LT(rdp->gpnum, rdp->completed)) | ||
| 699 | rdp->gpnum = rdp->completed; | ||
| 700 | |||
| 701 | /* | ||
| 702 | * If RCU does not need a quiescent state from this CPU, | ||
| 703 | * then make sure that this CPU doesn't go looking for one. | ||
| 704 | */ | ||
| 705 | if ((rnp->qsmask & rdp->grpmask) == 0) | ||
| 706 | rdp->qs_pending = 0; | ||
| 684 | } | 707 | } |
| 685 | } | 708 | } |
| 686 | 709 | ||
| @@ -984,53 +1007,31 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 984 | #ifdef CONFIG_HOTPLUG_CPU | 1007 | #ifdef CONFIG_HOTPLUG_CPU |
| 985 | 1008 | ||
| 986 | /* | 1009 | /* |
| 987 | * Move a dying CPU's RCU callbacks to the ->orphan_cbs_list for the | 1010 | * Move a dying CPU's RCU callbacks to online CPU's callback list. |
| 988 | * specified flavor of RCU. The callbacks will be adopted by the next | 1011 | * Synchronization is not required because this function executes |
| 989 | * _rcu_barrier() invocation or by the CPU_DEAD notifier, whichever | 1012 | * in stop_machine() context. |
| 990 | * comes first. Because this is invoked from the CPU_DYING notifier, | ||
| 991 | * irqs are already disabled. | ||
| 992 | */ | 1013 | */ |
| 993 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | 1014 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) |
| 994 | { | 1015 | { |
| 995 | int i; | 1016 | int i; |
| 1017 | /* current DYING CPU is cleared in the cpu_online_mask */ | ||
| 1018 | int receive_cpu = cpumask_any(cpu_online_mask); | ||
| 996 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); | 1019 | struct rcu_data *rdp = this_cpu_ptr(rsp->rda); |
| 1020 | struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); | ||
| 997 | 1021 | ||
| 998 | if (rdp->nxtlist == NULL) | 1022 | if (rdp->nxtlist == NULL) |
| 999 | return; /* irqs disabled, so comparison is stable. */ | 1023 | return; /* irqs disabled, so comparison is stable. */ |
| 1000 | raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ | 1024 | |
| 1001 | *rsp->orphan_cbs_tail = rdp->nxtlist; | 1025 | *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; |
| 1002 | rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL]; | 1026 | receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; |
| 1027 | receive_rdp->qlen += rdp->qlen; | ||
| 1028 | receive_rdp->n_cbs_adopted += rdp->qlen; | ||
| 1029 | rdp->n_cbs_orphaned += rdp->qlen; | ||
| 1030 | |||
| 1003 | rdp->nxtlist = NULL; | 1031 | rdp->nxtlist = NULL; |
| 1004 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 1032 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
| 1005 | rdp->nxttail[i] = &rdp->nxtlist; | 1033 | rdp->nxttail[i] = &rdp->nxtlist; |
| 1006 | rsp->orphan_qlen += rdp->qlen; | ||
| 1007 | rdp->n_cbs_orphaned += rdp->qlen; | ||
| 1008 | rdp->qlen = 0; | 1034 | rdp->qlen = 0; |
| 1009 | raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ | ||
| 1010 | } | ||
| 1011 | |||
| 1012 | /* | ||
| 1013 | * Adopt previously orphaned RCU callbacks. | ||
| 1014 | */ | ||
| 1015 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | ||
| 1016 | { | ||
| 1017 | unsigned long flags; | ||
| 1018 | struct rcu_data *rdp; | ||
| 1019 | |||
| 1020 | raw_spin_lock_irqsave(&rsp->onofflock, flags); | ||
| 1021 | rdp = this_cpu_ptr(rsp->rda); | ||
| 1022 | if (rsp->orphan_cbs_list == NULL) { | ||
| 1023 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
| 1024 | return; | ||
| 1025 | } | ||
| 1026 | *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; | ||
| 1027 | rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_tail; | ||
| 1028 | rdp->qlen += rsp->orphan_qlen; | ||
| 1029 | rdp->n_cbs_adopted += rsp->orphan_qlen; | ||
| 1030 | rsp->orphan_cbs_list = NULL; | ||
| 1031 | rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; | ||
| 1032 | rsp->orphan_qlen = 0; | ||
| 1033 | raw_spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
| 1034 | } | 1035 | } |
| 1035 | 1036 | ||
| 1036 | /* | 1037 | /* |
| @@ -1081,8 +1082,6 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
| 1081 | raw_spin_unlock_irqrestore(&rnp->lock, flags); | 1082 | raw_spin_unlock_irqrestore(&rnp->lock, flags); |
| 1082 | if (need_report & RCU_OFL_TASKS_EXP_GP) | 1083 | if (need_report & RCU_OFL_TASKS_EXP_GP) |
| 1083 | rcu_report_exp_rnp(rsp, rnp); | 1084 | rcu_report_exp_rnp(rsp, rnp); |
| 1084 | |||
| 1085 | rcu_adopt_orphan_cbs(rsp); | ||
| 1086 | } | 1085 | } |
| 1087 | 1086 | ||
| 1088 | /* | 1087 | /* |
| @@ -1100,11 +1099,7 @@ static void rcu_offline_cpu(int cpu) | |||
| 1100 | 1099 | ||
| 1101 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1100 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
| 1102 | 1101 | ||
| 1103 | static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) | 1102 | static void rcu_send_cbs_to_online(struct rcu_state *rsp) |
| 1104 | { | ||
| 1105 | } | ||
| 1106 | |||
| 1107 | static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) | ||
| 1108 | { | 1103 | { |
| 1109 | } | 1104 | } |
| 1110 | 1105 | ||
| @@ -1440,22 +1435,11 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
| 1440 | */ | 1435 | */ |
| 1441 | local_irq_save(flags); | 1436 | local_irq_save(flags); |
| 1442 | rdp = this_cpu_ptr(rsp->rda); | 1437 | rdp = this_cpu_ptr(rsp->rda); |
| 1443 | rcu_process_gp_end(rsp, rdp); | ||
| 1444 | check_for_new_grace_period(rsp, rdp); | ||
| 1445 | 1438 | ||
| 1446 | /* Add the callback to our list. */ | 1439 | /* Add the callback to our list. */ |
| 1447 | *rdp->nxttail[RCU_NEXT_TAIL] = head; | 1440 | *rdp->nxttail[RCU_NEXT_TAIL] = head; |
| 1448 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; | 1441 | rdp->nxttail[RCU_NEXT_TAIL] = &head->next; |
| 1449 | 1442 | ||
| 1450 | /* Start a new grace period if one not already started. */ | ||
| 1451 | if (!rcu_gp_in_progress(rsp)) { | ||
| 1452 | unsigned long nestflag; | ||
| 1453 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
| 1454 | |||
| 1455 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | ||
| 1456 | rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ | ||
| 1457 | } | ||
| 1458 | |||
| 1459 | /* | 1443 | /* |
| 1460 | * Force the grace period if too many callbacks or too long waiting. | 1444 | * Force the grace period if too many callbacks or too long waiting. |
| 1461 | * Enforce hysteresis, and don't invoke force_quiescent_state() | 1445 | * Enforce hysteresis, and don't invoke force_quiescent_state() |
| @@ -1464,12 +1448,27 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
| 1464 | * is the only one waiting for a grace period to complete. | 1448 | * is the only one waiting for a grace period to complete. |
| 1465 | */ | 1449 | */ |
| 1466 | if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | 1450 | if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { |
| 1467 | rdp->blimit = LONG_MAX; | 1451 | |
| 1468 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | 1452 | /* Are we ignoring a completed grace period? */ |
| 1469 | *rdp->nxttail[RCU_DONE_TAIL] != head) | 1453 | rcu_process_gp_end(rsp, rdp); |
| 1470 | force_quiescent_state(rsp, 0); | 1454 | check_for_new_grace_period(rsp, rdp); |
| 1471 | rdp->n_force_qs_snap = rsp->n_force_qs; | 1455 | |
| 1472 | rdp->qlen_last_fqs_check = rdp->qlen; | 1456 | /* Start a new grace period if one not already started. */ |
| 1457 | if (!rcu_gp_in_progress(rsp)) { | ||
| 1458 | unsigned long nestflag; | ||
| 1459 | struct rcu_node *rnp_root = rcu_get_root(rsp); | ||
| 1460 | |||
| 1461 | raw_spin_lock_irqsave(&rnp_root->lock, nestflag); | ||
| 1462 | rcu_start_gp(rsp, nestflag); /* rlses rnp_root->lock */ | ||
| 1463 | } else { | ||
| 1464 | /* Give the grace period a kick. */ | ||
| 1465 | rdp->blimit = LONG_MAX; | ||
| 1466 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | ||
| 1467 | *rdp->nxttail[RCU_DONE_TAIL] != head) | ||
| 1468 | force_quiescent_state(rsp, 0); | ||
| 1469 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
| 1470 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
| 1471 | } | ||
| 1473 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) | 1472 | } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) |
| 1474 | force_quiescent_state(rsp, 1); | 1473 | force_quiescent_state(rsp, 1); |
| 1475 | local_irq_restore(flags); | 1474 | local_irq_restore(flags); |
| @@ -1699,13 +1698,12 @@ static void _rcu_barrier(struct rcu_state *rsp, | |||
| 1699 | * decrement rcu_barrier_cpu_count -- otherwise the first CPU | 1698 | * decrement rcu_barrier_cpu_count -- otherwise the first CPU |
| 1700 | * might complete its grace period before all of the other CPUs | 1699 | * might complete its grace period before all of the other CPUs |
| 1701 | * did their increment, causing this function to return too | 1700 | * did their increment, causing this function to return too |
| 1702 | * early. | 1701 | * early. Note that on_each_cpu() disables irqs, which prevents |
| 1702 | * any CPUs from coming online or going offline until each online | ||
| 1703 | * CPU has queued its RCU-barrier callback. | ||
| 1703 | */ | 1704 | */ |
| 1704 | atomic_set(&rcu_barrier_cpu_count, 1); | 1705 | atomic_set(&rcu_barrier_cpu_count, 1); |
| 1705 | preempt_disable(); /* stop CPU_DYING from filling orphan_cbs_list */ | ||
| 1706 | rcu_adopt_orphan_cbs(rsp); | ||
| 1707 | on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); | 1706 | on_each_cpu(rcu_barrier_func, (void *)call_rcu_func, 1); |
| 1708 | preempt_enable(); /* CPU_DYING can again fill orphan_cbs_list */ | ||
| 1709 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) | 1707 | if (atomic_dec_and_test(&rcu_barrier_cpu_count)) |
| 1710 | complete(&rcu_barrier_completion); | 1708 | complete(&rcu_barrier_completion); |
| 1711 | wait_for_completion(&rcu_barrier_completion); | 1709 | wait_for_completion(&rcu_barrier_completion); |
| @@ -1831,18 +1829,13 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
| 1831 | case CPU_DYING: | 1829 | case CPU_DYING: |
| 1832 | case CPU_DYING_FROZEN: | 1830 | case CPU_DYING_FROZEN: |
| 1833 | /* | 1831 | /* |
| 1834 | * preempt_disable() in _rcu_barrier() prevents stop_machine(), | 1832 | * The whole machine is "stopped" except this CPU, so we can |
| 1835 | * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);" | 1833 | * touch any data without introducing corruption. We send the |
| 1836 | * returns, all online cpus have queued rcu_barrier_func(). | 1834 | * dying CPU's callbacks to an arbitrarily chosen online CPU. |
| 1837 | * The dying CPU clears its cpu_online_mask bit and | ||
| 1838 | * moves all of its RCU callbacks to ->orphan_cbs_list | ||
| 1839 | * in the context of stop_machine(), so subsequent calls | ||
| 1840 | * to _rcu_barrier() will adopt these callbacks and only | ||
| 1841 | * then queue rcu_barrier_func() on all remaining CPUs. | ||
| 1842 | */ | 1835 | */ |
| 1843 | rcu_send_cbs_to_orphanage(&rcu_bh_state); | 1836 | rcu_send_cbs_to_online(&rcu_bh_state); |
| 1844 | rcu_send_cbs_to_orphanage(&rcu_sched_state); | 1837 | rcu_send_cbs_to_online(&rcu_sched_state); |
| 1845 | rcu_preempt_send_cbs_to_orphanage(); | 1838 | rcu_preempt_send_cbs_to_online(); |
| 1846 | break; | 1839 | break; |
| 1847 | case CPU_DEAD: | 1840 | case CPU_DEAD: |
| 1848 | case CPU_DEAD_FROZEN: | 1841 | case CPU_DEAD_FROZEN: |
| @@ -1880,8 +1873,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
| 1880 | { | 1873 | { |
| 1881 | int i; | 1874 | int i; |
| 1882 | 1875 | ||
| 1883 | for (i = NUM_RCU_LVLS - 1; i >= 0; i--) | 1876 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) |
| 1884 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; | 1877 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; |
| 1878 | rsp->levelspread[0] = RCU_FANOUT_LEAF; | ||
| 1885 | } | 1879 | } |
| 1886 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ | 1880 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ |
| 1887 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 1881 | static void __init rcu_init_levelspread(struct rcu_state *rsp) |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 91d4170c5c13..e8f057e44e3e 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
| @@ -31,46 +31,51 @@ | |||
| 31 | /* | 31 | /* |
| 32 | * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. | 32 | * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. |
| 33 | * In theory, it should be possible to add more levels straightforwardly. | 33 | * In theory, it should be possible to add more levels straightforwardly. |
| 34 | * In practice, this has not been tested, so there is probably some | 34 | * In practice, this did work well going from three levels to four. |
| 35 | * bug somewhere. | 35 | * Of course, your mileage may vary. |
| 36 | */ | 36 | */ |
| 37 | #define MAX_RCU_LVLS 4 | 37 | #define MAX_RCU_LVLS 4 |
| 38 | #define RCU_FANOUT (CONFIG_RCU_FANOUT) | 38 | #if CONFIG_RCU_FANOUT > 16 |
| 39 | #define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT) | 39 | #define RCU_FANOUT_LEAF 16 |
| 40 | #define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT) | 40 | #else /* #if CONFIG_RCU_FANOUT > 16 */ |
| 41 | #define RCU_FANOUT_FOURTH (RCU_FANOUT_CUBE * RCU_FANOUT) | 41 | #define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT) |
| 42 | 42 | #endif /* #else #if CONFIG_RCU_FANOUT > 16 */ | |
| 43 | #if NR_CPUS <= RCU_FANOUT | 43 | #define RCU_FANOUT_1 (RCU_FANOUT_LEAF) |
| 44 | #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) | ||
| 45 | #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) | ||
| 46 | #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) | ||
| 47 | |||
| 48 | #if NR_CPUS <= RCU_FANOUT_1 | ||
| 44 | # define NUM_RCU_LVLS 1 | 49 | # define NUM_RCU_LVLS 1 |
| 45 | # define NUM_RCU_LVL_0 1 | 50 | # define NUM_RCU_LVL_0 1 |
| 46 | # define NUM_RCU_LVL_1 (NR_CPUS) | 51 | # define NUM_RCU_LVL_1 (NR_CPUS) |
| 47 | # define NUM_RCU_LVL_2 0 | 52 | # define NUM_RCU_LVL_2 0 |
| 48 | # define NUM_RCU_LVL_3 0 | 53 | # define NUM_RCU_LVL_3 0 |
| 49 | # define NUM_RCU_LVL_4 0 | 54 | # define NUM_RCU_LVL_4 0 |
| 50 | #elif NR_CPUS <= RCU_FANOUT_SQ | 55 | #elif NR_CPUS <= RCU_FANOUT_2 |
| 51 | # define NUM_RCU_LVLS 2 | 56 | # define NUM_RCU_LVLS 2 |
| 52 | # define NUM_RCU_LVL_0 1 | 57 | # define NUM_RCU_LVL_0 1 |
| 53 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) | 58 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) |
| 54 | # define NUM_RCU_LVL_2 (NR_CPUS) | 59 | # define NUM_RCU_LVL_2 (NR_CPUS) |
| 55 | # define NUM_RCU_LVL_3 0 | 60 | # define NUM_RCU_LVL_3 0 |
| 56 | # define NUM_RCU_LVL_4 0 | 61 | # define NUM_RCU_LVL_4 0 |
| 57 | #elif NR_CPUS <= RCU_FANOUT_CUBE | 62 | #elif NR_CPUS <= RCU_FANOUT_3 |
| 58 | # define NUM_RCU_LVLS 3 | 63 | # define NUM_RCU_LVLS 3 |
| 59 | # define NUM_RCU_LVL_0 1 | 64 | # define NUM_RCU_LVL_0 1 |
| 60 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ) | 65 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) |
| 61 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) | 66 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) |
| 62 | # define NUM_RCU_LVL_3 NR_CPUS | 67 | # define NUM_RCU_LVL_3 (NR_CPUS) |
| 63 | # define NUM_RCU_LVL_4 0 | 68 | # define NUM_RCU_LVL_4 0 |
| 64 | #elif NR_CPUS <= RCU_FANOUT_FOURTH | 69 | #elif NR_CPUS <= RCU_FANOUT_4 |
| 65 | # define NUM_RCU_LVLS 4 | 70 | # define NUM_RCU_LVLS 4 |
| 66 | # define NUM_RCU_LVL_0 1 | 71 | # define NUM_RCU_LVL_0 1 |
| 67 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_CUBE) | 72 | # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) |
| 68 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ) | 73 | # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) |
| 69 | # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) | 74 | # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) |
| 70 | # define NUM_RCU_LVL_4 NR_CPUS | 75 | # define NUM_RCU_LVL_4 (NR_CPUS) |
| 71 | #else | 76 | #else |
| 72 | # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" | 77 | # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" |
| 73 | #endif /* #if (NR_CPUS) <= RCU_FANOUT */ | 78 | #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ |
| 74 | 79 | ||
| 75 | #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) | 80 | #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) |
| 76 | #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) | 81 | #define NUM_RCU_NODES (RCU_SUM - NR_CPUS) |
| @@ -203,8 +208,8 @@ struct rcu_data { | |||
| 203 | long qlen_last_fqs_check; | 208 | long qlen_last_fqs_check; |
| 204 | /* qlen at last check for QS forcing */ | 209 | /* qlen at last check for QS forcing */ |
| 205 | unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ | 210 | unsigned long n_cbs_invoked; /* count of RCU cbs invoked. */ |
| 206 | unsigned long n_cbs_orphaned; /* RCU cbs sent to orphanage. */ | 211 | unsigned long n_cbs_orphaned; /* RCU cbs orphaned by dying CPU */ |
| 207 | unsigned long n_cbs_adopted; /* RCU cbs adopted from orphanage. */ | 212 | unsigned long n_cbs_adopted; /* RCU cbs adopted from dying CPU */ |
| 208 | unsigned long n_force_qs_snap; | 213 | unsigned long n_force_qs_snap; |
| 209 | /* did other CPU force QS recently? */ | 214 | /* did other CPU force QS recently? */ |
| 210 | long blimit; /* Upper limit on a processed batch */ | 215 | long blimit; /* Upper limit on a processed batch */ |
| @@ -309,15 +314,7 @@ struct rcu_state { | |||
| 309 | /* End of fields guarded by root rcu_node's lock. */ | 314 | /* End of fields guarded by root rcu_node's lock. */ |
| 310 | 315 | ||
| 311 | raw_spinlock_t onofflock; /* exclude on/offline and */ | 316 | raw_spinlock_t onofflock; /* exclude on/offline and */ |
| 312 | /* starting new GP. Also */ | 317 | /* starting new GP. */ |
| 313 | /* protects the following */ | ||
| 314 | /* orphan_cbs fields. */ | ||
| 315 | struct rcu_head *orphan_cbs_list; /* list of rcu_head structs */ | ||
| 316 | /* orphaned by all CPUs in */ | ||
| 317 | /* a given leaf rcu_node */ | ||
| 318 | /* going offline. */ | ||
| 319 | struct rcu_head **orphan_cbs_tail; /* And tail pointer. */ | ||
| 320 | long orphan_qlen; /* Number of orphaned cbs. */ | ||
| 321 | raw_spinlock_t fqslock; /* Only one task forcing */ | 318 | raw_spinlock_t fqslock; /* Only one task forcing */ |
| 322 | /* quiescent states. */ | 319 | /* quiescent states. */ |
| 323 | unsigned long jiffies_force_qs; /* Time at which to invoke */ | 320 | unsigned long jiffies_force_qs; /* Time at which to invoke */ |
| @@ -390,7 +387,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); | |||
| 390 | static int rcu_preempt_pending(int cpu); | 387 | static int rcu_preempt_pending(int cpu); |
| 391 | static int rcu_preempt_needs_cpu(int cpu); | 388 | static int rcu_preempt_needs_cpu(int cpu); |
| 392 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); | 389 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); |
| 393 | static void rcu_preempt_send_cbs_to_orphanage(void); | 390 | static void rcu_preempt_send_cbs_to_online(void); |
| 394 | static void __init __rcu_init_preempt(void); | 391 | static void __init __rcu_init_preempt(void); |
| 395 | static void rcu_needs_cpu_flush(void); | 392 | static void rcu_needs_cpu_flush(void); |
| 396 | 393 | ||
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 71a4147473f9..a3638710dc67 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | */ | 25 | */ |
| 26 | 26 | ||
| 27 | #include <linux/delay.h> | 27 | #include <linux/delay.h> |
| 28 | #include <linux/stop_machine.h> | ||
| 28 | 29 | ||
| 29 | /* | 30 | /* |
| 30 | * Check the RCU kernel configuration parameters and print informative | 31 | * Check the RCU kernel configuration parameters and print informative |
| @@ -773,11 +774,11 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | |||
| 773 | } | 774 | } |
| 774 | 775 | ||
| 775 | /* | 776 | /* |
| 776 | * Move preemptable RCU's callbacks to ->orphan_cbs_list. | 777 | * Move preemptable RCU's callbacks from dying CPU to other online CPU. |
| 777 | */ | 778 | */ |
| 778 | static void rcu_preempt_send_cbs_to_orphanage(void) | 779 | static void rcu_preempt_send_cbs_to_online(void) |
| 779 | { | 780 | { |
| 780 | rcu_send_cbs_to_orphanage(&rcu_preempt_state); | 781 | rcu_send_cbs_to_online(&rcu_preempt_state); |
| 781 | } | 782 | } |
| 782 | 783 | ||
| 783 | /* | 784 | /* |
| @@ -1001,7 +1002,7 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu) | |||
| 1001 | /* | 1002 | /* |
| 1002 | * Because there is no preemptable RCU, there are no callbacks to move. | 1003 | * Because there is no preemptable RCU, there are no callbacks to move. |
| 1003 | */ | 1004 | */ |
| 1004 | static void rcu_preempt_send_cbs_to_orphanage(void) | 1005 | static void rcu_preempt_send_cbs_to_online(void) |
| 1005 | { | 1006 | { |
| 1006 | } | 1007 | } |
| 1007 | 1008 | ||
| @@ -1014,6 +1015,132 @@ static void __init __rcu_init_preempt(void) | |||
| 1014 | 1015 | ||
| 1015 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ | 1016 | #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ |
| 1016 | 1017 | ||
| 1018 | #ifndef CONFIG_SMP | ||
| 1019 | |||
| 1020 | void synchronize_sched_expedited(void) | ||
| 1021 | { | ||
| 1022 | cond_resched(); | ||
| 1023 | } | ||
| 1024 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
| 1025 | |||
| 1026 | #else /* #ifndef CONFIG_SMP */ | ||
| 1027 | |||
| 1028 | static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0); | ||
| 1029 | static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0); | ||
| 1030 | |||
| 1031 | static int synchronize_sched_expedited_cpu_stop(void *data) | ||
| 1032 | { | ||
| 1033 | /* | ||
| 1034 | * There must be a full memory barrier on each affected CPU | ||
| 1035 | * between the time that try_stop_cpus() is called and the | ||
| 1036 | * time that it returns. | ||
| 1037 | * | ||
| 1038 | * In the current initial implementation of cpu_stop, the | ||
| 1039 | * above condition is already met when the control reaches | ||
| 1040 | * this point and the following smp_mb() is not strictly | ||
| 1041 | * necessary. Do smp_mb() anyway for documentation and | ||
| 1042 | * robustness against future implementation changes. | ||
| 1043 | */ | ||
| 1044 | smp_mb(); /* See above comment block. */ | ||
| 1045 | return 0; | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | /* | ||
| 1049 | * Wait for an rcu-sched grace period to elapse, but use "big hammer" | ||
| 1050 | * approach to force grace period to end quickly. This consumes | ||
| 1051 | * significant time on all CPUs, and is thus not recommended for | ||
| 1052 | * any sort of common-case code. | ||
| 1053 | * | ||
| 1054 | * Note that it is illegal to call this function while holding any | ||
| 1055 | * lock that is acquired by a CPU-hotplug notifier. Failing to | ||
| 1056 | * observe this restriction will result in deadlock. | ||
| 1057 | * | ||
| 1058 | * This implementation can be thought of as an application of ticket | ||
| 1059 | * locking to RCU, with sync_sched_expedited_started and | ||
| 1060 | * sync_sched_expedited_done taking on the roles of the halves | ||
| 1061 | * of the ticket-lock word. Each task atomically increments | ||
| 1062 | * sync_sched_expedited_started upon entry, snapshotting the old value, | ||
| 1063 | * then attempts to stop all the CPUs. If this succeeds, then each | ||
| 1064 | * CPU will have executed a context switch, resulting in an RCU-sched | ||
| 1065 | * grace period. We are then done, so we use atomic_cmpxchg() to | ||
| 1066 | * update sync_sched_expedited_done to match our snapshot -- but | ||
| 1067 | * only if someone else has not already advanced past our snapshot. | ||
| 1068 | * | ||
| 1069 | * On the other hand, if try_stop_cpus() fails, we check the value | ||
| 1070 | * of sync_sched_expedited_done. If it has advanced past our | ||
| 1071 | * initial snapshot, then someone else must have forced a grace period | ||
| 1072 | * some time after we took our snapshot. In this case, our work is | ||
| 1073 | * done for us, and we can simply return. Otherwise, we try again, | ||
| 1074 | * but keep our initial snapshot for purposes of checking for someone | ||
| 1075 | * doing our work for us. | ||
| 1076 | * | ||
| 1077 | * If we fail too many times in a row, we fall back to synchronize_sched(). | ||
| 1078 | */ | ||
| 1079 | void synchronize_sched_expedited(void) | ||
| 1080 | { | ||
| 1081 | int firstsnap, s, snap, trycount = 0; | ||
| 1082 | |||
| 1083 | /* Note that atomic_inc_return() implies full memory barrier. */ | ||
| 1084 | firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); | ||
| 1085 | get_online_cpus(); | ||
| 1086 | |||
| 1087 | /* | ||
| 1088 | * Each pass through the following loop attempts to force a | ||
| 1089 | * context switch on each CPU. | ||
| 1090 | */ | ||
| 1091 | while (try_stop_cpus(cpu_online_mask, | ||
| 1092 | synchronize_sched_expedited_cpu_stop, | ||
| 1093 | NULL) == -EAGAIN) { | ||
| 1094 | put_online_cpus(); | ||
| 1095 | |||
| 1096 | /* No joy, try again later. Or just synchronize_sched(). */ | ||
| 1097 | if (trycount++ < 10) | ||
| 1098 | udelay(trycount * num_online_cpus()); | ||
| 1099 | else { | ||
| 1100 | synchronize_sched(); | ||
| 1101 | return; | ||
| 1102 | } | ||
| 1103 | |||
| 1104 | /* Check to see if someone else did our work for us. */ | ||
| 1105 | s = atomic_read(&sync_sched_expedited_done); | ||
| 1106 | if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) { | ||
| 1107 | smp_mb(); /* ensure test happens before caller kfree */ | ||
| 1108 | return; | ||
| 1109 | } | ||
| 1110 | |||
| 1111 | /* | ||
| 1112 | * Refetching sync_sched_expedited_started allows later | ||
| 1113 | * callers to piggyback on our grace period. We subtract | ||
| 1114 | * 1 to get the same token that the last incrementer got. | ||
| 1115 | * We retry after they started, so our grace period works | ||
| 1116 | * for them, and they started after our first try, so their | ||
| 1117 | * grace period works for us. | ||
| 1118 | */ | ||
| 1119 | get_online_cpus(); | ||
| 1120 | snap = atomic_read(&sync_sched_expedited_started) - 1; | ||
| 1121 | smp_mb(); /* ensure read is before try_stop_cpus(). */ | ||
| 1122 | } | ||
| 1123 | |||
| 1124 | /* | ||
| 1125 | * Everyone up to our most recent fetch is covered by our grace | ||
| 1126 | * period. Update the counter, but only if our work is still | ||
| 1127 | * relevant -- which it won't be if someone who started later | ||
| 1128 | * than we did beat us to the punch. | ||
| 1129 | */ | ||
| 1130 | do { | ||
| 1131 | s = atomic_read(&sync_sched_expedited_done); | ||
| 1132 | if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) { | ||
| 1133 | smp_mb(); /* ensure test happens before caller kfree */ | ||
| 1134 | break; | ||
| 1135 | } | ||
| 1136 | } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s); | ||
| 1137 | |||
| 1138 | put_online_cpus(); | ||
| 1139 | } | ||
| 1140 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
| 1141 | |||
| 1142 | #endif /* #else #ifndef CONFIG_SMP */ | ||
| 1143 | |||
| 1017 | #if !defined(CONFIG_RCU_FAST_NO_HZ) | 1144 | #if !defined(CONFIG_RCU_FAST_NO_HZ) |
| 1018 | 1145 | ||
| 1019 | /* | 1146 | /* |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index d15430b9d122..c8e97853b970 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
| @@ -166,13 +166,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
| 166 | 166 | ||
| 167 | gpnum = rsp->gpnum; | 167 | gpnum = rsp->gpnum; |
| 168 | seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " | 168 | seq_printf(m, "c=%lu g=%lu s=%d jfq=%ld j=%x " |
| 169 | "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld\n", | 169 | "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu\n", |
| 170 | rsp->completed, gpnum, rsp->signaled, | 170 | rsp->completed, gpnum, rsp->signaled, |
| 171 | (long)(rsp->jiffies_force_qs - jiffies), | 171 | (long)(rsp->jiffies_force_qs - jiffies), |
| 172 | (int)(jiffies & 0xffff), | 172 | (int)(jiffies & 0xffff), |
| 173 | rsp->n_force_qs, rsp->n_force_qs_ngp, | 173 | rsp->n_force_qs, rsp->n_force_qs_ngp, |
| 174 | rsp->n_force_qs - rsp->n_force_qs_ngp, | 174 | rsp->n_force_qs - rsp->n_force_qs_ngp, |
| 175 | rsp->n_force_qs_lh, rsp->orphan_qlen); | 175 | rsp->n_force_qs_lh); |
| 176 | for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { | 176 | for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < NUM_RCU_NODES; rnp++) { |
| 177 | if (rnp->level != level) { | 177 | if (rnp->level != level) { |
| 178 | seq_puts(m, "\n"); | 178 | seq_puts(m, "\n"); |
| @@ -300,7 +300,7 @@ static const struct file_operations rcu_pending_fops = { | |||
| 300 | 300 | ||
| 301 | static struct dentry *rcudir; | 301 | static struct dentry *rcudir; |
| 302 | 302 | ||
| 303 | static int __init rcuclassic_trace_init(void) | 303 | static int __init rcutree_trace_init(void) |
| 304 | { | 304 | { |
| 305 | struct dentry *retval; | 305 | struct dentry *retval; |
| 306 | 306 | ||
| @@ -337,14 +337,14 @@ free_out: | |||
| 337 | return 1; | 337 | return 1; |
| 338 | } | 338 | } |
| 339 | 339 | ||
| 340 | static void __exit rcuclassic_trace_cleanup(void) | 340 | static void __exit rcutree_trace_cleanup(void) |
| 341 | { | 341 | { |
| 342 | debugfs_remove_recursive(rcudir); | 342 | debugfs_remove_recursive(rcudir); |
| 343 | } | 343 | } |
| 344 | 344 | ||
| 345 | 345 | ||
| 346 | module_init(rcuclassic_trace_init); | 346 | module_init(rcutree_trace_init); |
| 347 | module_exit(rcuclassic_trace_cleanup); | 347 | module_exit(rcutree_trace_cleanup); |
| 348 | 348 | ||
| 349 | MODULE_AUTHOR("Paul E. McKenney"); | 349 | MODULE_AUTHOR("Paul E. McKenney"); |
| 350 | MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation"); | 350 | MODULE_DESCRIPTION("Read-Copy Update tracing for hierarchical implementation"); |
diff --git a/kernel/sched.c b/kernel/sched.c index 297d1a0eedb0..e6f8f1254319 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -9534,72 +9534,3 @@ struct cgroup_subsys cpuacct_subsys = { | |||
| 9534 | }; | 9534 | }; |
| 9535 | #endif /* CONFIG_CGROUP_CPUACCT */ | 9535 | #endif /* CONFIG_CGROUP_CPUACCT */ |
| 9536 | 9536 | ||
| 9537 | #ifndef CONFIG_SMP | ||
| 9538 | |||
| 9539 | void synchronize_sched_expedited(void) | ||
| 9540 | { | ||
| 9541 | barrier(); | ||
| 9542 | } | ||
| 9543 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
| 9544 | |||
| 9545 | #else /* #ifndef CONFIG_SMP */ | ||
| 9546 | |||
| 9547 | static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0); | ||
| 9548 | |||
| 9549 | static int synchronize_sched_expedited_cpu_stop(void *data) | ||
| 9550 | { | ||
| 9551 | /* | ||
| 9552 | * There must be a full memory barrier on each affected CPU | ||
| 9553 | * between the time that try_stop_cpus() is called and the | ||
| 9554 | * time that it returns. | ||
| 9555 | * | ||
| 9556 | * In the current initial implementation of cpu_stop, the | ||
| 9557 | * above condition is already met when the control reaches | ||
| 9558 | * this point and the following smp_mb() is not strictly | ||
| 9559 | * necessary. Do smp_mb() anyway for documentation and | ||
| 9560 | * robustness against future implementation changes. | ||
| 9561 | */ | ||
| 9562 | smp_mb(); /* See above comment block. */ | ||
| 9563 | return 0; | ||
| 9564 | } | ||
| 9565 | |||
| 9566 | /* | ||
| 9567 | * Wait for an rcu-sched grace period to elapse, but use "big hammer" | ||
| 9568 | * approach to force grace period to end quickly. This consumes | ||
| 9569 | * significant time on all CPUs, and is thus not recommended for | ||
| 9570 | * any sort of common-case code. | ||
| 9571 | * | ||
| 9572 | * Note that it is illegal to call this function while holding any | ||
| 9573 | * lock that is acquired by a CPU-hotplug notifier. Failing to | ||
| 9574 | * observe this restriction will result in deadlock. | ||
| 9575 | */ | ||
| 9576 | void synchronize_sched_expedited(void) | ||
| 9577 | { | ||
| 9578 | int snap, trycount = 0; | ||
| 9579 | |||
| 9580 | smp_mb(); /* ensure prior mod happens before capturing snap. */ | ||
| 9581 | snap = atomic_read(&synchronize_sched_expedited_count) + 1; | ||
| 9582 | get_online_cpus(); | ||
| 9583 | while (try_stop_cpus(cpu_online_mask, | ||
| 9584 | synchronize_sched_expedited_cpu_stop, | ||
| 9585 | NULL) == -EAGAIN) { | ||
| 9586 | put_online_cpus(); | ||
| 9587 | if (trycount++ < 10) | ||
| 9588 | udelay(trycount * num_online_cpus()); | ||
| 9589 | else { | ||
| 9590 | synchronize_sched(); | ||
| 9591 | return; | ||
| 9592 | } | ||
| 9593 | if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) { | ||
| 9594 | smp_mb(); /* ensure test happens before caller kfree */ | ||
| 9595 | return; | ||
| 9596 | } | ||
| 9597 | get_online_cpus(); | ||
| 9598 | } | ||
| 9599 | atomic_inc(&synchronize_sched_expedited_count); | ||
| 9600 | smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */ | ||
| 9601 | put_online_cpus(); | ||
| 9602 | } | ||
| 9603 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | ||
| 9604 | |||
| 9605 | #endif /* #else #ifndef CONFIG_SMP */ | ||
diff --git a/kernel/srcu.c b/kernel/srcu.c index c71e07500536..98d8c1e80edb 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/rcupdate.h> | 31 | #include <linux/rcupdate.h> |
| 32 | #include <linux/sched.h> | 32 | #include <linux/sched.h> |
| 33 | #include <linux/smp.h> | 33 | #include <linux/smp.h> |
| 34 | #include <linux/delay.h> | ||
| 34 | #include <linux/srcu.h> | 35 | #include <linux/srcu.h> |
| 35 | 36 | ||
| 36 | static int init_srcu_struct_fields(struct srcu_struct *sp) | 37 | static int init_srcu_struct_fields(struct srcu_struct *sp) |
| @@ -203,9 +204,14 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) | |||
| 203 | * all srcu_read_lock() calls using the old counters have completed. | 204 | * all srcu_read_lock() calls using the old counters have completed. |
| 204 | * Their corresponding critical sections might well be still | 205 | * Their corresponding critical sections might well be still |
| 205 | * executing, but the srcu_read_lock() primitives themselves | 206 | * executing, but the srcu_read_lock() primitives themselves |
| 206 | * will have finished executing. | 207 | * will have finished executing. We initially give readers |
| 208 | * an arbitrarily chosen 10 microseconds to get out of their | ||
| 209 | * SRCU read-side critical sections, then loop waiting 1/HZ | ||
| 210 | * seconds per iteration. | ||
| 207 | */ | 211 | */ |
| 208 | 212 | ||
| 213 | if (srcu_readers_active_idx(sp, idx)) | ||
| 214 | udelay(CONFIG_SRCU_SYNCHRONIZE_DELAY); | ||
| 209 | while (srcu_readers_active_idx(sp, idx)) | 215 | while (srcu_readers_active_idx(sp, idx)) |
| 210 | schedule_timeout_interruptible(1); | 216 | schedule_timeout_interruptible(1); |
| 211 | 217 | ||
