diff options
Diffstat (limited to 'kernel/rcutree_plugin.h')
| -rw-r--r-- | kernel/rcutree_plugin.h | 198 |
1 files changed, 189 insertions, 9 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index c9f0c975c003..37fbccdf41d5 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
| @@ -24,12 +24,15 @@ | |||
| 24 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 24 | * Paul E. McKenney <paulmck@linux.vnet.ibm.com> |
| 25 | */ | 25 | */ |
| 26 | 26 | ||
| 27 | #include <linux/delay.h> | ||
| 27 | 28 | ||
| 28 | #ifdef CONFIG_TREE_PREEMPT_RCU | 29 | #ifdef CONFIG_TREE_PREEMPT_RCU |
| 29 | 30 | ||
| 30 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); | 31 | struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); |
| 31 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); | 32 | DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); |
| 32 | 33 | ||
| 34 | static int rcu_preempted_readers_exp(struct rcu_node *rnp); | ||
| 35 | |||
| 33 | /* | 36 | /* |
| 34 | * Tell them what RCU they are running. | 37 | * Tell them what RCU they are running. |
| 35 | */ | 38 | */ |
| @@ -157,7 +160,10 @@ EXPORT_SYMBOL_GPL(__rcu_read_lock); | |||
| 157 | */ | 160 | */ |
| 158 | static int rcu_preempted_readers(struct rcu_node *rnp) | 161 | static int rcu_preempted_readers(struct rcu_node *rnp) |
| 159 | { | 162 | { |
| 160 | return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]); | 163 | int phase = rnp->gpnum & 0x1; |
| 164 | |||
| 165 | return !list_empty(&rnp->blocked_tasks[phase]) || | ||
| 166 | !list_empty(&rnp->blocked_tasks[phase + 2]); | ||
| 161 | } | 167 | } |
| 162 | 168 | ||
| 163 | /* | 169 | /* |
| @@ -204,6 +210,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) | |||
| 204 | static void rcu_read_unlock_special(struct task_struct *t) | 210 | static void rcu_read_unlock_special(struct task_struct *t) |
| 205 | { | 211 | { |
| 206 | int empty; | 212 | int empty; |
| 213 | int empty_exp; | ||
| 207 | unsigned long flags; | 214 | unsigned long flags; |
| 208 | struct rcu_node *rnp; | 215 | struct rcu_node *rnp; |
| 209 | int special; | 216 | int special; |
| @@ -247,6 +254,8 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
| 247 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 254 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
| 248 | } | 255 | } |
| 249 | empty = !rcu_preempted_readers(rnp); | 256 | empty = !rcu_preempted_readers(rnp); |
| 257 | empty_exp = !rcu_preempted_readers_exp(rnp); | ||
| 258 | smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ | ||
| 250 | list_del_init(&t->rcu_node_entry); | 259 | list_del_init(&t->rcu_node_entry); |
| 251 | t->rcu_blocked_node = NULL; | 260 | t->rcu_blocked_node = NULL; |
| 252 | 261 | ||
| @@ -259,6 +268,13 @@ static void rcu_read_unlock_special(struct task_struct *t) | |||
| 259 | spin_unlock_irqrestore(&rnp->lock, flags); | 268 | spin_unlock_irqrestore(&rnp->lock, flags); |
| 260 | else | 269 | else |
| 261 | rcu_report_unblock_qs_rnp(rnp, flags); | 270 | rcu_report_unblock_qs_rnp(rnp, flags); |
| 271 | |||
| 272 | /* | ||
| 273 | * If this was the last task on the expedited lists, | ||
| 274 | * then we need to report up the rcu_node hierarchy. | ||
| 275 | */ | ||
| 276 | if (!empty_exp && !rcu_preempted_readers_exp(rnp)) | ||
| 277 | rcu_report_exp_rnp(&rcu_preempt_state, rnp); | ||
| 262 | } else { | 278 | } else { |
| 263 | local_irq_restore(flags); | 279 | local_irq_restore(flags); |
| 264 | } | 280 | } |
| @@ -343,7 +359,7 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
| 343 | int i; | 359 | int i; |
| 344 | struct list_head *lp; | 360 | struct list_head *lp; |
| 345 | struct list_head *lp_root; | 361 | struct list_head *lp_root; |
| 346 | int retval; | 362 | int retval = 0; |
| 347 | struct rcu_node *rnp_root = rcu_get_root(rsp); | 363 | struct rcu_node *rnp_root = rcu_get_root(rsp); |
| 348 | struct task_struct *tp; | 364 | struct task_struct *tp; |
| 349 | 365 | ||
| @@ -353,7 +369,9 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
| 353 | } | 369 | } |
| 354 | WARN_ON_ONCE(rnp != rdp->mynode && | 370 | WARN_ON_ONCE(rnp != rdp->mynode && |
| 355 | (!list_empty(&rnp->blocked_tasks[0]) || | 371 | (!list_empty(&rnp->blocked_tasks[0]) || |
| 356 | !list_empty(&rnp->blocked_tasks[1]))); | 372 | !list_empty(&rnp->blocked_tasks[1]) || |
| 373 | !list_empty(&rnp->blocked_tasks[2]) || | ||
| 374 | !list_empty(&rnp->blocked_tasks[3]))); | ||
| 357 | 375 | ||
| 358 | /* | 376 | /* |
| 359 | * Move tasks up to root rcu_node. Rely on the fact that the | 377 | * Move tasks up to root rcu_node. Rely on the fact that the |
| @@ -361,8 +379,11 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, | |||
| 361 | * rcu_nodes in terms of gp_num value. This fact allows us to | 379 | * rcu_nodes in terms of gp_num value. This fact allows us to |
| 362 | * move the blocked_tasks[] array directly, element by element. | 380 | * move the blocked_tasks[] array directly, element by element. |
| 363 | */ | 381 | */ |
| 364 | retval = rcu_preempted_readers(rnp); | 382 | if (rcu_preempted_readers(rnp)) |
| 365 | for (i = 0; i < 2; i++) { | 383 | retval |= RCU_OFL_TASKS_NORM_GP; |
| 384 | if (rcu_preempted_readers_exp(rnp)) | ||
| 385 | retval |= RCU_OFL_TASKS_EXP_GP; | ||
| 386 | for (i = 0; i < 4; i++) { | ||
| 366 | lp = &rnp->blocked_tasks[i]; | 387 | lp = &rnp->blocked_tasks[i]; |
| 367 | lp_root = &rnp_root->blocked_tasks[i]; | 388 | lp_root = &rnp_root->blocked_tasks[i]; |
| 368 | while (!list_empty(lp)) { | 389 | while (!list_empty(lp)) { |
| @@ -449,14 +470,159 @@ void synchronize_rcu(void) | |||
| 449 | } | 470 | } |
| 450 | EXPORT_SYMBOL_GPL(synchronize_rcu); | 471 | EXPORT_SYMBOL_GPL(synchronize_rcu); |
| 451 | 472 | ||
| 473 | static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); | ||
| 474 | static long sync_rcu_preempt_exp_count; | ||
| 475 | static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); | ||
| 476 | |||
| 477 | /* | ||
| 478 | * Return non-zero if there are any tasks in RCU read-side critical | ||
| 479 | * sections blocking the current preemptible-RCU expedited grace period. | ||
| 480 | * If there is no preemptible-RCU expedited grace period currently in | ||
| 481 | * progress, returns zero unconditionally. | ||
| 482 | */ | ||
| 483 | static int rcu_preempted_readers_exp(struct rcu_node *rnp) | ||
| 484 | { | ||
| 485 | return !list_empty(&rnp->blocked_tasks[2]) || | ||
| 486 | !list_empty(&rnp->blocked_tasks[3]); | ||
| 487 | } | ||
| 488 | |||
| 489 | /* | ||
| 490 | * return non-zero if there is no RCU expedited grace period in progress | ||
| 491 | * for the specified rcu_node structure, in other words, if all CPUs and | ||
| 492 | * tasks covered by the specified rcu_node structure have done their bit | ||
| 493 | * for the current expedited grace period. Works only for preemptible | ||
| 494 | * RCU -- other RCU implementation use other means. | ||
| 495 | * | ||
| 496 | * Caller must hold sync_rcu_preempt_exp_mutex. | ||
| 497 | */ | ||
| 498 | static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) | ||
| 499 | { | ||
| 500 | return !rcu_preempted_readers_exp(rnp) && | ||
| 501 | ACCESS_ONCE(rnp->expmask) == 0; | ||
| 502 | } | ||
| 503 | |||
| 504 | /* | ||
| 505 | * Report the exit from RCU read-side critical section for the last task | ||
| 506 | * that queued itself during or before the current expedited preemptible-RCU | ||
| 507 | * grace period. This event is reported either to the rcu_node structure on | ||
| 508 | * which the task was queued or to one of that rcu_node structure's ancestors, | ||
| 509 | * recursively up the tree. (Calm down, calm down, we do the recursion | ||
| 510 | * iteratively!) | ||
| 511 | * | ||
| 512 | * Caller must hold sync_rcu_preempt_exp_mutex. | ||
| 513 | */ | ||
| 514 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | ||
| 515 | { | ||
| 516 | unsigned long flags; | ||
| 517 | unsigned long mask; | ||
| 518 | |||
| 519 | spin_lock_irqsave(&rnp->lock, flags); | ||
| 520 | for (;;) { | ||
| 521 | if (!sync_rcu_preempt_exp_done(rnp)) | ||
| 522 | break; | ||
| 523 | if (rnp->parent == NULL) { | ||
| 524 | wake_up(&sync_rcu_preempt_exp_wq); | ||
| 525 | break; | ||
| 526 | } | ||
| 527 | mask = rnp->grpmask; | ||
| 528 | spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
| 529 | rnp = rnp->parent; | ||
| 530 | spin_lock(&rnp->lock); /* irqs already disabled */ | ||
| 531 | rnp->expmask &= ~mask; | ||
| 532 | } | ||
| 533 | spin_unlock_irqrestore(&rnp->lock, flags); | ||
| 534 | } | ||
| 535 | |||
| 536 | /* | ||
| 537 | * Snapshot the tasks blocking the newly started preemptible-RCU expedited | ||
| 538 | * grace period for the specified rcu_node structure. If there are no such | ||
| 539 | * tasks, report it up the rcu_node hierarchy. | ||
| 540 | * | ||
| 541 | * Caller must hold sync_rcu_preempt_exp_mutex and rsp->onofflock. | ||
| 542 | */ | ||
| 543 | static void | ||
| 544 | sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) | ||
| 545 | { | ||
| 546 | int must_wait; | ||
| 547 | |||
| 548 | spin_lock(&rnp->lock); /* irqs already disabled */ | ||
| 549 | list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); | ||
| 550 | list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); | ||
| 551 | must_wait = rcu_preempted_readers_exp(rnp); | ||
| 552 | spin_unlock(&rnp->lock); /* irqs remain disabled */ | ||
| 553 | if (!must_wait) | ||
| 554 | rcu_report_exp_rnp(rsp, rnp); | ||
| 555 | } | ||
| 556 | |||
| 452 | /* | 557 | /* |
| 453 | * Wait for an rcu-preempt grace period. We are supposed to expedite the | 558 | * Wait for an rcu-preempt grace period, but expedite it. The basic idea |
| 454 | * grace period, but this is the crude slow compatability hack, so just | 559 | * is to invoke synchronize_sched_expedited() to push all the tasks to |
| 455 | * invoke synchronize_rcu(). | 560 | * the ->blocked_tasks[] lists, move all entries from the first set of |
| 561 | * ->blocked_tasks[] lists to the second set, and finally wait for this | ||
| 562 | * second set to drain. | ||
| 456 | */ | 563 | */ |
| 457 | void synchronize_rcu_expedited(void) | 564 | void synchronize_rcu_expedited(void) |
| 458 | { | 565 | { |
| 459 | synchronize_rcu(); | 566 | unsigned long flags; |
| 567 | struct rcu_node *rnp; | ||
| 568 | struct rcu_state *rsp = &rcu_preempt_state; | ||
| 569 | long snap; | ||
| 570 | int trycount = 0; | ||
| 571 | |||
| 572 | smp_mb(); /* Caller's modifications seen first by other CPUs. */ | ||
| 573 | snap = ACCESS_ONCE(sync_rcu_preempt_exp_count) + 1; | ||
| 574 | smp_mb(); /* Above access cannot bleed into critical section. */ | ||
| 575 | |||
| 576 | /* | ||
| 577 | * Acquire lock, falling back to synchronize_rcu() if too many | ||
| 578 | * lock-acquisition failures. Of course, if someone does the | ||
| 579 | * expedited grace period for us, just leave. | ||
| 580 | */ | ||
| 581 | while (!mutex_trylock(&sync_rcu_preempt_exp_mutex)) { | ||
| 582 | if (trycount++ < 10) | ||
| 583 | udelay(trycount * num_online_cpus()); | ||
| 584 | else { | ||
| 585 | synchronize_rcu(); | ||
| 586 | return; | ||
| 587 | } | ||
| 588 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | ||
| 589 | goto mb_ret; /* Others did our work for us. */ | ||
| 590 | } | ||
| 591 | if ((ACCESS_ONCE(sync_rcu_preempt_exp_count) - snap) > 0) | ||
| 592 | goto unlock_mb_ret; /* Others did our work for us. */ | ||
| 593 | |||
| 594 | /* force all RCU readers onto blocked_tasks[]. */ | ||
| 595 | synchronize_sched_expedited(); | ||
| 596 | |||
| 597 | spin_lock_irqsave(&rsp->onofflock, flags); | ||
| 598 | |||
| 599 | /* Initialize ->expmask for all non-leaf rcu_node structures. */ | ||
| 600 | rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { | ||
| 601 | spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
| 602 | rnp->expmask = rnp->qsmaskinit; | ||
| 603 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
| 604 | } | ||
| 605 | |||
| 606 | /* Snapshot current state of ->blocked_tasks[] lists. */ | ||
| 607 | rcu_for_each_leaf_node(rsp, rnp) | ||
| 608 | sync_rcu_preempt_exp_init(rsp, rnp); | ||
| 609 | if (NUM_RCU_NODES > 1) | ||
| 610 | sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); | ||
| 611 | |||
| 612 | spin_unlock_irqrestore(&rsp->onofflock, flags); | ||
| 613 | |||
| 614 | /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ | ||
| 615 | rnp = rcu_get_root(rsp); | ||
| 616 | wait_event(sync_rcu_preempt_exp_wq, | ||
| 617 | sync_rcu_preempt_exp_done(rnp)); | ||
| 618 | |||
| 619 | /* Clean up and exit. */ | ||
| 620 | smp_mb(); /* ensure expedited GP seen before counter increment. */ | ||
| 621 | ACCESS_ONCE(sync_rcu_preempt_exp_count)++; | ||
| 622 | unlock_mb_ret: | ||
| 623 | mutex_unlock(&sync_rcu_preempt_exp_mutex); | ||
| 624 | mb_ret: | ||
| 625 | smp_mb(); /* ensure subsequent action seen after grace period. */ | ||
| 460 | } | 626 | } |
| 461 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 627 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); |
| 462 | 628 | ||
| @@ -655,6 +821,20 @@ void synchronize_rcu_expedited(void) | |||
| 655 | } | 821 | } |
| 656 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | 822 | EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); |
| 657 | 823 | ||
| 824 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 825 | |||
| 826 | /* | ||
| 827 | * Because preemptable RCU does not exist, there is never any need to | ||
| 828 | * report on tasks preempted in RCU read-side critical sections during | ||
| 829 | * expedited RCU grace periods. | ||
| 830 | */ | ||
| 831 | static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) | ||
| 832 | { | ||
| 833 | return; | ||
| 834 | } | ||
| 835 | |||
| 836 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||
| 837 | |||
| 658 | /* | 838 | /* |
| 659 | * Because preemptable RCU does not exist, it never has any work to do. | 839 | * Because preemptable RCU does not exist, it never has any work to do. |
| 660 | */ | 840 | */ |
