diff options
| -rw-r--r-- | kernel/rcutorture.c | 34 | ||||
| -rw-r--r-- | kernel/rcutree.c | 10 | ||||
| -rw-r--r-- | kernel/rcutree.h | 35 | ||||
| -rw-r--r-- | kernel/rcutree_plugin.h | 198 | ||||
| -rw-r--r-- | kernel/rcutree_trace.c | 10 | 
5 files changed, 260 insertions, 27 deletions
| diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 3dd0ca23e191..a621a67ef4e3 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
| @@ -327,6 +327,11 @@ rcu_torture_cb(struct rcu_head *p) | |||
| 327 | cur_ops->deferred_free(rp); | 327 | cur_ops->deferred_free(rp); | 
| 328 | } | 328 | } | 
| 329 | 329 | ||
| 330 | static int rcu_no_completed(void) | ||
| 331 | { | ||
| 332 | return 0; | ||
| 333 | } | ||
| 334 | |||
| 330 | static void rcu_torture_deferred_free(struct rcu_torture *p) | 335 | static void rcu_torture_deferred_free(struct rcu_torture *p) | 
| 331 | { | 336 | { | 
| 332 | call_rcu(&p->rtort_rcu, rcu_torture_cb); | 337 | call_rcu(&p->rtort_rcu, rcu_torture_cb); | 
| @@ -388,6 +393,21 @@ static struct rcu_torture_ops rcu_sync_ops = { | |||
| 388 | .name = "rcu_sync" | 393 | .name = "rcu_sync" | 
| 389 | }; | 394 | }; | 
| 390 | 395 | ||
| 396 | static struct rcu_torture_ops rcu_expedited_ops = { | ||
| 397 | .init = rcu_sync_torture_init, | ||
| 398 | .cleanup = NULL, | ||
| 399 | .readlock = rcu_torture_read_lock, | ||
| 400 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | ||
| 401 | .readunlock = rcu_torture_read_unlock, | ||
| 402 | .completed = rcu_no_completed, | ||
| 403 | .deferred_free = rcu_sync_torture_deferred_free, | ||
| 404 | .sync = synchronize_rcu_expedited, | ||
| 405 | .cb_barrier = NULL, | ||
| 406 | .stats = NULL, | ||
| 407 | .irq_capable = 1, | ||
| 408 | .name = "rcu_expedited" | ||
| 409 | }; | ||
| 410 | |||
| 391 | /* | 411 | /* | 
| 392 | * Definitions for rcu_bh torture testing. | 412 | * Definitions for rcu_bh torture testing. | 
| 393 | */ | 413 | */ | 
| @@ -581,11 +601,6 @@ static void sched_torture_read_unlock(int idx) | |||
| 581 | preempt_enable(); | 601 | preempt_enable(); | 
| 582 | } | 602 | } | 
| 583 | 603 | ||
| 584 | static int sched_torture_completed(void) | ||
| 585 | { | ||
| 586 | return 0; | ||
| 587 | } | ||
| 588 | |||
| 589 | static void rcu_sched_torture_deferred_free(struct rcu_torture *p) | 604 | static void rcu_sched_torture_deferred_free(struct rcu_torture *p) | 
| 590 | { | 605 | { | 
| 591 | call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); | 606 | call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); | 
| @@ -602,7 +617,7 @@ static struct rcu_torture_ops sched_ops = { | |||
| 602 | .readlock = sched_torture_read_lock, | 617 | .readlock = sched_torture_read_lock, | 
| 603 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | 618 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | 
| 604 | .readunlock = sched_torture_read_unlock, | 619 | .readunlock = sched_torture_read_unlock, | 
| 605 | .completed = sched_torture_completed, | 620 | .completed = rcu_no_completed, | 
| 606 | .deferred_free = rcu_sched_torture_deferred_free, | 621 | .deferred_free = rcu_sched_torture_deferred_free, | 
| 607 | .sync = sched_torture_synchronize, | 622 | .sync = sched_torture_synchronize, | 
| 608 | .cb_barrier = rcu_barrier_sched, | 623 | .cb_barrier = rcu_barrier_sched, | 
| @@ -617,7 +632,7 @@ static struct rcu_torture_ops sched_sync_ops = { | |||
| 617 | .readlock = sched_torture_read_lock, | 632 | .readlock = sched_torture_read_lock, | 
| 618 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | 633 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | 
| 619 | .readunlock = sched_torture_read_unlock, | 634 | .readunlock = sched_torture_read_unlock, | 
| 620 | .completed = sched_torture_completed, | 635 | .completed = rcu_no_completed, | 
| 621 | .deferred_free = rcu_sync_torture_deferred_free, | 636 | .deferred_free = rcu_sync_torture_deferred_free, | 
| 622 | .sync = sched_torture_synchronize, | 637 | .sync = sched_torture_synchronize, | 
| 623 | .cb_barrier = NULL, | 638 | .cb_barrier = NULL, | 
| @@ -631,7 +646,7 @@ static struct rcu_torture_ops sched_expedited_ops = { | |||
| 631 | .readlock = sched_torture_read_lock, | 646 | .readlock = sched_torture_read_lock, | 
| 632 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | 647 | .read_delay = rcu_read_delay, /* just reuse rcu's version. */ | 
| 633 | .readunlock = sched_torture_read_unlock, | 648 | .readunlock = sched_torture_read_unlock, | 
| 634 | .completed = sched_torture_completed, | 649 | .completed = rcu_no_completed, | 
| 635 | .deferred_free = rcu_sync_torture_deferred_free, | 650 | .deferred_free = rcu_sync_torture_deferred_free, | 
| 636 | .sync = synchronize_sched_expedited, | 651 | .sync = synchronize_sched_expedited, | 
| 637 | .cb_barrier = NULL, | 652 | .cb_barrier = NULL, | 
| @@ -1116,7 +1131,8 @@ rcu_torture_init(void) | |||
| 1116 | int cpu; | 1131 | int cpu; | 
| 1117 | int firsterr = 0; | 1132 | int firsterr = 0; | 
| 1118 | static struct rcu_torture_ops *torture_ops[] = | 1133 | static struct rcu_torture_ops *torture_ops[] = | 
| 1119 | { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, | 1134 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, | 
| 1135 | &rcu_bh_ops, &rcu_bh_sync_ops, | ||
| 1120 | &srcu_ops, &srcu_expedited_ops, | 1136 | &srcu_ops, &srcu_expedited_ops, | 
| 1121 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; | 1137 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; | 
| 1122 | 1138 | ||
| diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d47e03e5792a..53ae9598f798 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -948,7 +948,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
| 948 | { | 948 | { | 
| 949 | unsigned long flags; | 949 | unsigned long flags; | 
| 950 | unsigned long mask; | 950 | unsigned long mask; | 
| 951 | int need_quiet = 0; | 951 | int need_report = 0; | 
| 952 | struct rcu_data *rdp = rsp->rda[cpu]; | 952 | struct rcu_data *rdp = rsp->rda[cpu]; | 
| 953 | struct rcu_node *rnp; | 953 | struct rcu_node *rnp; | 
| 954 | 954 | ||
| @@ -967,7 +967,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
| 967 | break; | 967 | break; | 
| 968 | } | 968 | } | 
| 969 | if (rnp == rdp->mynode) | 969 | if (rnp == rdp->mynode) | 
| 970 | need_quiet = rcu_preempt_offline_tasks(rsp, rnp, rdp); | 970 | need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); | 
| 971 | else | 971 | else | 
| 972 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 972 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 
| 973 | mask = rnp->grpmask; | 973 | mask = rnp->grpmask; | 
| @@ -982,10 +982,12 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) | |||
| 982 | */ | 982 | */ | 
| 983 | spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ | 983 | spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ | 
| 984 | rnp = rdp->mynode; | 984 | rnp = rdp->mynode; | 
| 985 | if (need_quiet) | 985 | if (need_report & RCU_OFL_TASKS_NORM_GP) | 
| 986 | rcu_report_unblock_qs_rnp(rnp, flags); | 986 | rcu_report_unblock_qs_rnp(rnp, flags); | 
| 987 | else | 987 | else | 
| 988 | spin_unlock_irqrestore(&rnp->lock, flags); | 988 | spin_unlock_irqrestore(&rnp->lock, flags); | 
| 989 | if (need_report & RCU_OFL_TASKS_EXP_GP) | ||
| 990 | rcu_report_exp_rnp(rsp, rnp); | ||
| 989 | 991 | ||
| 990 | rcu_adopt_orphan_cbs(rsp); | 992 | rcu_adopt_orphan_cbs(rsp); | 
| 991 | } | 993 | } | 
| @@ -1843,6 +1845,8 @@ static void __init rcu_init_one(struct rcu_state *rsp) | |||
| 1843 | rnp->level = i; | 1845 | rnp->level = i; | 
| 1844 | INIT_LIST_HEAD(&rnp->blocked_tasks[0]); | 1846 | INIT_LIST_HEAD(&rnp->blocked_tasks[0]); | 
| 1845 | INIT_LIST_HEAD(&rnp->blocked_tasks[1]); | 1847 | INIT_LIST_HEAD(&rnp->blocked_tasks[1]); | 
| 1848 | INIT_LIST_HEAD(&rnp->blocked_tasks[2]); | ||
| 1849 | INIT_LIST_HEAD(&rnp->blocked_tasks[3]); | ||
| 1846 | } | 1850 | } | 
| 1847 | } | 1851 | } | 
| 1848 | } | 1852 | } | 
| diff --git a/kernel/rcutree.h b/kernel/rcutree.h index df2e0b694744..d2a0046f63b2 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
| @@ -104,8 +104,12 @@ struct rcu_node { | |||
| 104 | /* an rcu_data structure, otherwise, each */ | 104 | /* an rcu_data structure, otherwise, each */ | 
| 105 | /* bit corresponds to a child rcu_node */ | 105 | /* bit corresponds to a child rcu_node */ | 
| 106 | /* structure. */ | 106 | /* structure. */ | 
| 107 | unsigned long expmask; /* Groups that have ->blocked_tasks[] */ | ||
| 108 | /* elements that need to drain to allow the */ | ||
| 109 | /* current expedited grace period to */ | ||
| 110 | /* complete (only for TREE_PREEMPT_RCU). */ | ||
| 107 | unsigned long qsmaskinit; | 111 | unsigned long qsmaskinit; | 
| 108 | /* Per-GP initialization for qsmask. */ | 112 | /* Per-GP initial value for qsmask & expmask. */ | 
| 109 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ | 113 | unsigned long grpmask; /* Mask to apply to parent qsmask. */ | 
| 110 | /* Only one bit will be set in this mask. */ | 114 | /* Only one bit will be set in this mask. */ | 
| 111 | int grplo; /* lowest-numbered CPU or group here. */ | 115 | int grplo; /* lowest-numbered CPU or group here. */ | 
| @@ -113,7 +117,7 @@ struct rcu_node { | |||
| 113 | u8 grpnum; /* CPU/group number for next level up. */ | 117 | u8 grpnum; /* CPU/group number for next level up. */ | 
| 114 | u8 level; /* root is at level 0. */ | 118 | u8 level; /* root is at level 0. */ | 
| 115 | struct rcu_node *parent; | 119 | struct rcu_node *parent; | 
| 116 | struct list_head blocked_tasks[2]; | 120 | struct list_head blocked_tasks[4]; | 
| 117 | /* Tasks blocked in RCU read-side critsect. */ | 121 | /* Tasks blocked in RCU read-side critsect. */ | 
| 118 | /* Grace period number (->gpnum) x blocked */ | 122 | /* Grace period number (->gpnum) x blocked */ | 
| 119 | /* by tasks on the (x & 0x1) element of the */ | 123 | /* by tasks on the (x & 0x1) element of the */ | 
| @@ -128,6 +132,21 @@ struct rcu_node { | |||
| 128 | for ((rnp) = &(rsp)->node[0]; \ | 132 | for ((rnp) = &(rsp)->node[0]; \ | 
| 129 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) | 133 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) | 
| 130 | 134 | ||
| 135 | /* | ||
| 136 | * Do a breadth-first scan of the non-leaf rcu_node structures for the | ||
| 137 | * specified rcu_state structure. Note that if there is a singleton | ||
| 138 | * rcu_node tree with but one rcu_node structure, this loop is a no-op. | ||
| 139 | */ | ||
| 140 | #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ | ||
| 141 | for ((rnp) = &(rsp)->node[0]; \ | ||
| 142 | (rnp) < (rsp)->level[NUM_RCU_LVLS - 1]; (rnp)++) | ||
| 143 | |||
| 144 | /* | ||
| 145 | * Scan the leaves of the rcu_node hierarchy for the specified rcu_state | ||
| 146 | * structure. Note that if there is a singleton rcu_node tree with but | ||
| 147 | * one rcu_node structure, this loop -will- visit the rcu_node structure. | ||
| 148 | * It is still a leaf node, even if it is also the root node. | ||
| 149 | */ | ||
| 131 | #define rcu_for_each_leaf_node(rsp, rnp) \ | 150 | #define rcu_for_each_leaf_node(rsp, rnp) \ | 
| 132 | for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ | 151 | for ((rnp) = (rsp)->level[NUM_RCU_LVLS - 1]; \ | 
| 133 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) | 152 | (rnp) < &(rsp)->node[NUM_RCU_NODES]; (rnp)++) | 
| @@ -261,7 +280,7 @@ struct rcu_state { | |||
| 261 | long gpnum; /* Current gp number. */ | 280 | long gpnum; /* Current gp number. */ | 
| 262 | long completed; /* # of last completed gp. */ | 281 | long completed; /* # of last completed gp. */ | 
| 263 | 282 | ||
| 264 | /* End of fields guarded by root rcu_node's lock. */ | 283 | /* End of fields guarded by root rcu_node's lock. */ | 
| 265 | 284 | ||
| 266 | spinlock_t onofflock; /* exclude on/offline and */ | 285 | spinlock_t onofflock; /* exclude on/offline and */ | 
| 267 | /* starting new GP. Also */ | 286 | /* starting new GP. Also */ | 
| @@ -293,6 +312,13 @@ struct rcu_state { | |||
| 293 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 312 | #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ | 
| 294 | }; | 313 | }; | 
| 295 | 314 | ||
| 315 | /* Return values for rcu_preempt_offline_tasks(). */ | ||
| 316 | |||
| 317 | #define RCU_OFL_TASKS_NORM_GP 0x1 /* Tasks blocking normal */ | ||
| 318 | /* GP were moved to root. */ | ||
| 319 | #define RCU_OFL_TASKS_EXP_GP 0x2 /* Tasks blocking expedited */ | ||
| 320 | /* GP were moved to root. */ | ||
| 321 | |||
| 296 | #ifdef RCU_TREE_NONCORE | 322 | #ifdef RCU_TREE_NONCORE | 
| 297 | 323 | ||
| 298 | /* | 324 | /* | 
| @@ -333,6 +359,9 @@ static void rcu_preempt_offline_cpu(int cpu); | |||
| 333 | static void rcu_preempt_check_callbacks(int cpu); | 359 | static void rcu_preempt_check_callbacks(int cpu); | 
| 334 | static void rcu_preempt_process_callbacks(void); | 360 | static void rcu_preempt_process_callbacks(void); | 
| 335 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); | 361 | void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); | 
| 362 | #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) | ||
| 363 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp); | ||
| 364 | #endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */ | ||
| 336 | static int rcu_preempt_pending(int cpu); | 365 | static int rcu_preempt_pending(int cpu); | 
| 337 | static int rcu_preempt_needs_cpu(int cpu); | 366 | static int rcu_preempt_needs_cpu(int cpu); | 
| 338 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); | 367 | static void __cpuinit rcu_preempt_init_percpu_data(int cpu); | 
| diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c9f0c975c003..37fbccdf41d5 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
| @@ -24,12 +24,15 @@ | |||
| 24 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 24 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 
| 25 | */ | 25 | */ | 
| 26 | 26 | ||
| 27 | #include <linux/delay.h> | ||
| 27 | 28 | ||
| 28 | #ifdef CONFIG_TREE_PREEMPT_RCU | 29 | #ifdef CONFIG_TREE_PREEMPT_RCU | 
| 29 | 30 | ||
| 30 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); | 31 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); | 
| 31 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | 32 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | 
| 32 | 33 | ||
| 34 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); | ||
| 35 | |||
| 33 | /* | 36 | /* | 
| 34 | * Tell them what RCU they are running. | 37 | * Tell them what RCU they are running. | 
| 35 | */ | 38 | */ | 
| @@ -157,7 +160,10 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock); | |||
| 157 | */ | 160 | */ | 
| 158 | static int rcu_preempted_readers(struct rcu_node *rnp) | 161 | static int rcu_preempted_readers(struct rcu_node *rnp) | 
| 159 | { | 162 | { | 
| 160 | return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); | 163 | int phase = rnp->gpnum & 0x1; | 
| 164 | |||
| 165 | return !list_empty(&rnp->blocked_tasks[phase]) || | ||
| 166 | !list_empty(&rnp->blocked_tasks[phase + 2]); | ||
| 161 | } | 167 | } | 
| 162 | 168 | ||
| 163 | /* | 169 | /* | 
| @@ -204,6 +210,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) | |||
| 204 | static void rcu_read_unlock_special(struct task_struct *t) | 210 | static void rcu_read_unlock_special(struct task_struct *t) | 
| 205 | { | 211 | { | 
| 206 | int empty; | 212 | int empty; | 
| 213 | int empty_exp; | ||
| 207 | unsigned long flags; | 214 | unsigned long flags; | 
| 208 | struct rcu_node *rnp; | 215 | struct rcu_node *rnp; | 
| 209 | int special; | 216 | int special; | 
| @@ -247,6 +254,8 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
| 247 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 254 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 
| 248 | } | 255 | } | 
| 249 | empty = !rcu_preempted_readers(rnp); | 256 | empty = !rcu_preempted_readers(rnp); | 
| 257 | empty_exp = !rcu_preempted_readers_exp(rnp); | ||
| 258 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | ||
| 250 | list_del_init(&t->rcu_node_entry); | 259 | list_del_init(&t->rcu_node_entry); | 
| 251 | t->rcu_blocked_node = NULL; | 260 | t->rcu_blocked_node = NULL; | 
| 252 | 261 | ||
| @@ -259,6 +268,13 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
| 259 | spin_unlock_irqrestore(&rnp->lock, flags); | 268 | spin_unlock_irqrestore(&rnp->lock, flags); | 
| 260 | else | 269 | else | 
| 261 | rcu_report_unblock_qs_rnp(rnp, flags); | 270 | rcu_report_unblock_qs_rnp(rnp, flags); | 
| 271 | |||
| 272 | /* | ||
| 273 | * If this was the last task on the expedited lists, | ||
| 274 | * then we need to report up the rcu_node hierarchy. | ||
| 275 | */ | ||
| 276 | if (!empty_exp && !rcu_preempted_readers_exp(rnp)) | ||
| 277 | rcu_report_exp_rnp(&rcu_preempt_state, rnp); | ||
| 262 | } else { | 278 | } else { | 
| 263 | local_irq_restore(flags); | 279 | local_irq_restore(flags); | 
| 264 | } | 280 | } | 
| @@ -343,7 +359,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
| 343 | int i; | 359 | int i; | 
| 344 | struct list_head *lp; | 360 | struct list_head *lp; | 
| 345 | struct list_head *lp_root; | 361 | struct list_head *lp_root; | 
| 346 | int retval; | 362 | int retval = 0; | 
| 347 | struct rcu_node *rnp_root = rcu_get_root(rsp); | 363 | struct rcu_node *rnp_root = rcu_get_root(rsp); | 
| 348 | struct task_struct *tp; | 364 | struct task_struct *tp; | 
| 349 | 365 | ||
| @@ -353,7 +369,9 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
| 353 | } | 369 | } | 
| 354 | WARN_ON_ONCE(rnp != rdp->mynode && | 370 | WARN_ON_ONCE(rnp != rdp->mynode && | 
| 355 | (!list_empty(&rnp->blocked_tasks[0]) || | 371 | (!list_empty(&rnp->blocked_tasks[0]) || | 
| 356 | !list_empty(&rnp->blocked_tasks[1]))); | 372 | !list_empty(&rnp->blocked_tasks[1]) || | 
| 373 | !list_empty(&rnp->blocked_tasks[2]) || | ||
| 374 | !list_empty(&rnp->blocked_tasks[3]))); | ||
| 357 | 375 | ||
| 358 | /* | 376 | /* | 
| 359 | * Move tasks up to root rcu_node. Rely on the fact that the | 377 | * Move tasks up to root rcu_node. Rely on the fact that the | 
| @@ -361,8 +379,11 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
| 361 | * rcu_nodes in terms of gp_num value. This fact allows us to | 379 | * rcu_nodes in terms of gp_num value. This fact allows us to | 
| 362 | * move the blocked_tasks[] array directly, element by element. | 380 | * move the blocked_tasks[] array directly, element by element. | 
| 363 | */ | 381 | */ | 
| 364 | retval = rcu_preempted_readers(rnp); | 382 | if (rcu_preempted_readers(rnp)) | 
| 365 | for (i = 0; i < 2; i++) { | 383 | retval |= RCU_OFL_TASKS_NORM_GP; | 
| 384 | if (rcu_preempted_readers_exp(rnp)) | ||
| 385 | retval |= RCU_OFL_TASKS_EXP_GP; | ||
| 386 | for (i = 0; i < 4; i++) { | ||
| 366 | lp = &rnp->blocked_tasks[i]; | 387 | lp = &rnp->blocked_tasks[i]; | 
| 367 | lp_root = &rnp_root->blocked_tasks[i]; | 388 | lp_root = &rnp_root->blocked_tasks[i]; | 
| 368 | while (!list_empty(lp)) { | 389 | while (!list_empty(lp)) { | 
| @@ -449,14 +470,159 @@ void synchronize_rcu(void) | |||
| 449 | } | 470 | } | 
| 450 | EXPORT_SYMBOL_GPL(synchronize_rcu); | 471 | EXPORT_SYMBOL_GPL(synchronize_rcu); | 
| 451 | 472 | ||
| 473 | static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); | ||
| 474 | static long sync_rcu_preempt_exp_count; | ||
| 475 | static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); | ||
| 476 | |||
| 477 | /* | ||
| 478 | * Return non-zero if there are any tasks in RCU read-side critical | ||
| 479 | * sections blocking the current preemptible-RCU expedited grace period. | ||
| 480 | * If there is no preemptible-RCU expedited grace period currently in | ||
| 481 | * progress, returns zero unconditionally. | ||
| 482 | */ | ||
| 483 | static int rcu_preempted_readers_exp(struct rcu_node *rnp) | ||
| 484 | { | ||
| 485 | return !list_empty(&rnp->blocked_tasks[2]) || | ||
| 486 | !list_empty(&rnp->blocked_tasks[3]); | ||
| 487 | } | ||
| 488 | |||
| 489 | /* | ||
| 490 | * return non-zero if there is no RCU expedited grace period in progress | ||
| 491 | * for the specified rcu_node structure, in other words, if all CPUs and | ||
| 492 | * tasks covered by the specified rcu_node structure have done their bit | ||
| 493 | * for the current expedited grace period. Works only for preemptible | ||
| 494 | * RCU -- other RCU implementation use other means. | ||
| 495 | * | ||
| 496 | * Caller must hold sync_rcu_preempt_exp_mutex. | ||
| 497 | */ | ||
| 498 | static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) | ||
| 499 | { | ||
| 500 | return !rcu_preempted_readers_exp(rnp) && | ||
| 501 | ACCESS_ONCE(rnp->expmask) == 0; | ||
| 502 | } | ||
| 503 | |||
| 504 | /* | ||
| 505 | * Report the exit from RCU read-side critical section for the last task | ||
| 506 | * that queued itself during or before the current expedited preemptible-RCU | ||
| 507 | * grace period. This event is reported either to the rcu_node structure on | ||
| 508 | * which the task was queued or to one of that rcu_node structure's ancestors, | ||
| 509 | * recursively up the tree. (Calm down, calm down, we do the recursion | ||
| 510 | * iteratively!) | ||
| 511 | * | ||
| 512 | * Caller must hold sync_rcu_preempt_exp_mutex. | ||
| 513 | */ | ||
| 514 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | ||
| 515 | { | ||
| 516 | unsigned long flags; | ||
| 517 | unsigned long mask; | ||
| 518 | |||
| 519 | spin_lock_irqsave(&rnp->lock, flags); | ||
| 520 | for (;;) { | ||
| 521 | if (!sync_rcu_preempt_exp_done(rnp)) | ||
| 522 | break; | ||
| 523 | if (rnp->parent == NULL) { | ||
| 524 | wake_up(&sync_rcu_preempt_exp_wq); | ||
| 525 | break; | ||
| 526 | } | ||
| 527 | mask = rnp->grpmask; | ||
| 528 | spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
| 529 | rnp = rnp->parent; | ||
| 530 | spin_lock(&rnp->lock); /* irqs already disabled */ | ||
| 531 | rnp->expmask &= ~mask; | ||
| 532 | } | ||
| 533 | spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 534 | } | ||
| 535 | |||
| 536 | /* | ||
| 537 | * Snapshot the tasks blocking the newly started preemptible-RCU expedited | ||
| 538 | * grace period for the specified rcu_node structure. If there are no such | ||
| 539 | * tasks, report it up the rcu_node hierarchy. | ||
| 540 | * | ||
| 541 | * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. | ||
| 542 | */ | ||
| 543 | static void | ||
| 544 | sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | ||
| 545 | { | ||
| 546 | int must_wait; | ||
| 547 | |||
| 548 | spin_lock(&rnp->lock); /* irqs already disabled */ | ||
| 549 | list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); | ||
| 550 | list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); | ||
| 551 | must_wait = rcu_preempted_readers_exp(rnp); | ||
| 552 | spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
| 553 | if (!must_wait) | ||
| 554 | rcu_report_exp_rnp(rsp, rnp); | ||
| 555 | } | ||
| 556 | |||
| 452 | /* | 557 | /* | 
| 453 | * Wait for an rcu-preempt grace period. We are supposed to expedite the | 558 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea | 
| 454 | * grace period, but this is the crude slow compatability hack, so just | 559 | * is to invoke synchronize_sched_expedited() to push all the tasks to | 
| 455 | * invoke synchronize_rcu(). | 560 | * the ->blocked_tasks[] lists, move all entries from the first set of | 
| 561 | * ->blocked_tasks[] lists to the second set, and finally wait for this | ||
| 562 | * second set to drain. | ||
| 456 | */ | 563 | */ | 
| 457 | void synchronize_rcu_expedited(void) | 564 | void synchronize_rcu_expedited(void) | 
| 458 | { | 565 | { | 
| 459 | synchronize_rcu(); | 566 | unsigned long flags; | 
| 567 | struct rcu_node *rnp; | ||
| 568 | struct rcu_state *rsp = &rcu_preempt_state; | ||
| 569 | long snap; | ||
| 570 | int trycount = 0; | ||
| 571 | |||
| 572 | smp_mb(); /* Caller's modifications seen first by other CPUs. */ | ||
| 573 | snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; | ||
| 574 | smp_mb(); /* Above access cannot bleed into critical section. */ | ||
| 575 | |||
| 576 | /* | ||
| 577 | * Acquire lock, falling back to synchronize_rcu() if too many | ||
| 578 | * lock-acquisition failures. Of course, if someone does the | ||
| 579 | * expedited grace period for us, just leave. | ||
| 580 | */ | ||
| 581 | while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { | ||
| 582 | if (trycount++ < 10) | ||
| 583 | udelay(trycount * num_online_cpus()); | ||
| 584 | else { | ||
| 585 | synchronize_rcu(); | ||
| 586 | return; | ||
| 587 | } | ||
| 588 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | ||
| 589 | goto mb_ret; /* Others did our work for us. */ | ||
| 590 | } | ||
| 591 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | ||
| 592 | goto unlock_mb_ret; /* Others did our work for us. */ | ||
| 593 | |||
| 594 | /* force all RCU readers onto blocked_tasks[]. */ | ||
| 595 | synchronize_sched_expedited(); | ||
| 596 | |||
| 597 | spin_lock_irqsave(&rsp->onofflock, flags); | ||
| 598 | |||
| 599 | /* Initialize ->expmask for all non-leaf rcu_node structures. */ | ||
| 600 | rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { | ||
| 601 | spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
| 602 | rnp->expmask = rnp->qsmaskinit; | ||
| 603 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
| 604 | } | ||
| 605 | |||
| 606 | /* Snapshot current state of ->blocked_tasks[] lists. */ | ||
| 607 | rcu_for_each_leaf_node(rsp, rnp) | ||
| 608 | sync_rcu_preempt_exp_init(rsp, rnp); | ||
| 609 | if (NUM_RCU_NODES > 1) | ||
| 610 | sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); | ||
| 611 | |||
| 612 | spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
| 613 | |||
| 614 | /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ | ||
| 615 | rnp = rcu_get_root(rsp); | ||
| 616 | wait_event(sync_rcu_preempt_exp_wq, | ||
| 617 | sync_rcu_preempt_exp_done(rnp)); | ||
| 618 | |||
| 619 | /* Clean up and exit. */ | ||
| 620 | smp_mb(); /* ensure expedited GP seen before counter increment. */ | ||
| 621 | ACCESS_ONCE(sync_rcu_preempt_exp_count)++; | ||
| 622 | unlock_mb_ret: | ||
| 623 | mutex_unlock(&sync_rcu_preempt_exp_mutex); | ||
| 624 | mb_ret: | ||
| 625 | smp_mb(); /* ensure subsequent action seen after grace period. */ | ||
| 460 | } | 626 | } | 
| 461 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 627 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 
| 462 | 628 | ||
| @@ -655,6 +821,20 @@ void synchronize_rcu_expedited(void) | |||
| 655 | } | 821 | } | 
| 656 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 822 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 
| 657 | 823 | ||
| 824 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 825 | |||
| 826 | /* | ||
| 827 | * Because preemptable RCU does not exist, there is never any need to | ||
| 828 | * report on tasks preempted in RCU read-side critical sections during | ||
| 829 | * expedited RCU grace periods. | ||
| 830 | */ | ||
| 831 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | ||
| 832 | { | ||
| 833 | return; | ||
| 834 | } | ||
| 835 | |||
| 836 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 837 | |||
| 658 | /* | 838 | /* | 
| 659 | * Because preemptable RCU does not exist, it never has any work to do. | 839 | * Because preemptable RCU does not exist, it never has any work to do. | 
| 660 | */ | 840 | */ | 
| diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 1984cdc51e9a..9d2c88423b31 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
| @@ -157,6 +157,7 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
| 157 | { | 157 | { | 
| 158 | long gpnum; | 158 | long gpnum; | 
| 159 | int level = 0; | 159 | int level = 0; | 
| 160 | int phase; | ||
| 160 | struct rcu_node *rnp; | 161 | struct rcu_node *rnp; | 
| 161 | 162 | ||
| 162 | gpnum = rsp->gpnum; | 163 | gpnum = rsp->gpnum; | 
| @@ -173,10 +174,13 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
| 173 | seq_puts(m, "\n"); | 174 | seq_puts(m, "\n"); | 
| 174 | level = rnp->level; | 175 | level = rnp->level; | 
| 175 | } | 176 | } | 
| 176 | seq_printf(m, "%lx/%lx %c>%c %d:%d ^%d ", | 177 | phase = gpnum & 0x1; | 
| 178 | seq_printf(m, "%lx/%lx %c%c>%c%c %d:%d ^%d ", | ||
| 177 | rnp->qsmask, rnp->qsmaskinit, | 179 | rnp->qsmask, rnp->qsmaskinit, | 
| 178 | "T."[list_empty(&rnp->blocked_tasks[gpnum & 1])], | 180 | "T."[list_empty(&rnp->blocked_tasks[phase])], | 
| 179 | "T."[list_empty(&rnp->blocked_tasks[!(gpnum & 1)])], | 181 | "E."[list_empty(&rnp->blocked_tasks[phase + 2])], | 
| 182 | "T."[list_empty(&rnp->blocked_tasks[!phase])], | ||
| 183 | "E."[list_empty(&rnp->blocked_tasks[!phase + 2])], | ||
| 180 | rnp->grplo, rnp->grphi, rnp->grpnum); | 184 | rnp->grplo, rnp->grphi, rnp->grpnum); | 
| 181 | } | 185 | } | 
| 182 | seq_puts(m, "\n"); | 186 | seq_puts(m, "\n"); | 
