diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/rcupdate.c | 28 | ||||
| -rw-r--r-- | kernel/rcutiny_plugin.h | 16 | ||||
| -rw-r--r-- | kernel/rcutorture.c | 257 | ||||
| -rw-r--r-- | kernel/rcutree.c | 35 | ||||
| -rw-r--r-- | kernel/rcutree.h | 11 | ||||
| -rw-r--r-- | kernel/rcutree_plugin.h | 30 | ||||
| -rw-r--r-- | kernel/sched/core.c | 1 | ||||
| -rw-r--r-- | kernel/srcu.c | 548 | ||||
| -rw-r--r-- | kernel/timer.c | 8 |
9 files changed, 752 insertions, 182 deletions
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index a86f1741cc27..95cba41ce1e9 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
| @@ -51,6 +51,34 @@ | |||
| 51 | 51 | ||
| 52 | #include "rcu.h" | 52 | #include "rcu.h" |
| 53 | 53 | ||
| 54 | #ifdef CONFIG_PREEMPT_RCU | ||
| 55 | |||
| 56 | /* | ||
| 57 | * Check for a task exiting while in a preemptible-RCU read-side | ||
| 58 | * critical section, clean up if so. No need to issue warnings, | ||
| 59 | * as debug_check_no_locks_held() already does this if lockdep | ||
| 60 | * is enabled. | ||
| 61 | */ | ||
| 62 | void exit_rcu(void) | ||
| 63 | { | ||
| 64 | struct task_struct *t = current; | ||
| 65 | |||
| 66 | if (likely(list_empty(¤t->rcu_node_entry))) | ||
| 67 | return; | ||
| 68 | t->rcu_read_lock_nesting = 1; | ||
| 69 | barrier(); | ||
| 70 | t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED; | ||
| 71 | __rcu_read_unlock(); | ||
| 72 | } | ||
| 73 | |||
| 74 | #else /* #ifdef CONFIG_PREEMPT_RCU */ | ||
| 75 | |||
| 76 | void exit_rcu(void) | ||
| 77 | { | ||
| 78 | } | ||
| 79 | |||
| 80 | #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | ||
| 81 | |||
| 54 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 82 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 55 | static struct lock_class_key rcu_lock_key; | 83 | static struct lock_class_key rcu_lock_key; |
| 56 | struct lockdep_map rcu_lock_map = | 84 | struct lockdep_map rcu_lock_map = |
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 22ecea0dfb62..fc31a2d65100 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
| @@ -851,22 +851,6 @@ int rcu_preempt_needs_cpu(void) | |||
| 851 | return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; | 851 | return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; |
| 852 | } | 852 | } |
| 853 | 853 | ||
| 854 | /* | ||
| 855 | * Check for a task exiting while in a preemptible -RCU read-side | ||
| 856 | * critical section, clean up if so. No need to issue warnings, | ||
| 857 | * as debug_check_no_locks_held() already does this if lockdep | ||
| 858 | * is enabled. | ||
| 859 | */ | ||
| 860 | void exit_rcu(void) | ||
| 861 | { | ||
| 862 | struct task_struct *t = current; | ||
| 863 | |||
| 864 | if (t->rcu_read_lock_nesting == 0) | ||
| 865 | return; | ||
| 866 | t->rcu_read_lock_nesting = 1; | ||
| 867 | __rcu_read_unlock(); | ||
| 868 | } | ||
| 869 | |||
| 870 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ | 854 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ |
| 871 | 855 | ||
| 872 | #ifdef CONFIG_RCU_TRACE | 856 | #ifdef CONFIG_RCU_TRACE |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index a89b381a8c6e..e66b34ab7555 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
| @@ -64,6 +64,7 @@ static int irqreader = 1; /* RCU readers from irq (timers). */ | |||
| 64 | static int fqs_duration; /* Duration of bursts (us), 0 to disable. */ | 64 | static int fqs_duration; /* Duration of bursts (us), 0 to disable. */ |
| 65 | static int fqs_holdoff; /* Hold time within burst (us). */ | 65 | static int fqs_holdoff; /* Hold time within burst (us). */ |
| 66 | static int fqs_stutter = 3; /* Wait time between bursts (s). */ | 66 | static int fqs_stutter = 3; /* Wait time between bursts (s). */ |
| 67 | static int n_barrier_cbs; /* Number of callbacks to test RCU barriers. */ | ||
| 67 | static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */ | 68 | static int onoff_interval; /* Wait time between CPU hotplugs, 0=disable. */ |
| 68 | static int onoff_holdoff; /* Seconds after boot before CPU hotplugs. */ | 69 | static int onoff_holdoff; /* Seconds after boot before CPU hotplugs. */ |
| 69 | static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */ | 70 | static int shutdown_secs; /* Shutdown time (s). <=0 for no shutdown. */ |
| @@ -96,6 +97,8 @@ module_param(fqs_holdoff, int, 0444); | |||
| 96 | MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); | 97 | MODULE_PARM_DESC(fqs_holdoff, "Holdoff time within fqs bursts (us)"); |
| 97 | module_param(fqs_stutter, int, 0444); | 98 | module_param(fqs_stutter, int, 0444); |
| 98 | MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); | 99 | MODULE_PARM_DESC(fqs_stutter, "Wait time between fqs bursts (s)"); |
| 100 | module_param(n_barrier_cbs, int, 0444); | ||
| 101 | MODULE_PARM_DESC(n_barrier_cbs, "# of callbacks/kthreads for barrier testing"); | ||
| 99 | module_param(onoff_interval, int, 0444); | 102 | module_param(onoff_interval, int, 0444); |
| 100 | MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable"); | 103 | MODULE_PARM_DESC(onoff_interval, "Time between CPU hotplugs (s), 0=disable"); |
| 101 | module_param(onoff_holdoff, int, 0444); | 104 | module_param(onoff_holdoff, int, 0444); |
| @@ -139,6 +142,8 @@ static struct task_struct *shutdown_task; | |||
| 139 | static struct task_struct *onoff_task; | 142 | static struct task_struct *onoff_task; |
| 140 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | 143 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ |
| 141 | static struct task_struct *stall_task; | 144 | static struct task_struct *stall_task; |
| 145 | static struct task_struct **barrier_cbs_tasks; | ||
| 146 | static struct task_struct *barrier_task; | ||
| 142 | 147 | ||
| 143 | #define RCU_TORTURE_PIPE_LEN 10 | 148 | #define RCU_TORTURE_PIPE_LEN 10 |
| 144 | 149 | ||
| @@ -164,6 +169,7 @@ static atomic_t n_rcu_torture_alloc_fail; | |||
| 164 | static atomic_t n_rcu_torture_free; | 169 | static atomic_t n_rcu_torture_free; |
| 165 | static atomic_t n_rcu_torture_mberror; | 170 | static atomic_t n_rcu_torture_mberror; |
| 166 | static atomic_t n_rcu_torture_error; | 171 | static atomic_t n_rcu_torture_error; |
| 172 | static long n_rcu_torture_barrier_error; | ||
| 167 | static long n_rcu_torture_boost_ktrerror; | 173 | static long n_rcu_torture_boost_ktrerror; |
| 168 | static long n_rcu_torture_boost_rterror; | 174 | static long n_rcu_torture_boost_rterror; |
| 169 | static long n_rcu_torture_boost_failure; | 175 | static long n_rcu_torture_boost_failure; |
| @@ -173,6 +179,8 @@ static long n_offline_attempts; | |||
| 173 | static long n_offline_successes; | 179 | static long n_offline_successes; |
| 174 | static long n_online_attempts; | 180 | static long n_online_attempts; |
| 175 | static long n_online_successes; | 181 | static long n_online_successes; |
| 182 | static long n_barrier_attempts; | ||
| 183 | static long n_barrier_successes; | ||
| 176 | static struct list_head rcu_torture_removed; | 184 | static struct list_head rcu_torture_removed; |
| 177 | static cpumask_var_t shuffle_tmp_mask; | 185 | static cpumask_var_t shuffle_tmp_mask; |
| 178 | 186 | ||
| @@ -197,6 +205,10 @@ static unsigned long shutdown_time; /* jiffies to system shutdown. */ | |||
| 197 | static unsigned long boost_starttime; /* jiffies of next boost test start. */ | 205 | static unsigned long boost_starttime; /* jiffies of next boost test start. */ |
| 198 | DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ | 206 | DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ |
| 199 | /* and boost task create/destroy. */ | 207 | /* and boost task create/destroy. */ |
| 208 | static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */ | ||
| 209 | static atomic_t barrier_cbs_invoked; /* Barrier callbacks invoked. */ | ||
| 210 | static wait_queue_head_t *barrier_cbs_wq; /* Coordinate barrier testing. */ | ||
| 211 | static DECLARE_WAIT_QUEUE_HEAD(barrier_wq); | ||
| 200 | 212 | ||
| 201 | /* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ | 213 | /* Mediate rmmod and system shutdown. Concurrent rmmod & shutdown illegal! */ |
| 202 | 214 | ||
| @@ -327,6 +339,7 @@ struct rcu_torture_ops { | |||
| 327 | int (*completed)(void); | 339 | int (*completed)(void); |
| 328 | void (*deferred_free)(struct rcu_torture *p); | 340 | void (*deferred_free)(struct rcu_torture *p); |
| 329 | void (*sync)(void); | 341 | void (*sync)(void); |
| 342 | void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); | ||
| 330 | void (*cb_barrier)(void); | 343 | void (*cb_barrier)(void); |
| 331 | void (*fqs)(void); | 344 | void (*fqs)(void); |
| 332 | int (*stats)(char *page); | 345 | int (*stats)(char *page); |
| @@ -417,6 +430,7 @@ static struct rcu_torture_ops rcu_ops = { | |||
| 417 | .completed = rcu_torture_completed, | 430 | .completed = rcu_torture_completed, |
| 418 | .deferred_free = rcu_torture_deferred_free, | 431 | .deferred_free = rcu_torture_deferred_free, |
| 419 | .sync = synchronize_rcu, | 432 | .sync = synchronize_rcu, |
| 433 | .call = call_rcu, | ||
| 420 | .cb_barrier = rcu_barrier, | 434 | .cb_barrier = rcu_barrier, |
| 421 | .fqs = rcu_force_quiescent_state, | 435 | .fqs = rcu_force_quiescent_state, |
| 422 | .stats = NULL, | 436 | .stats = NULL, |
| @@ -460,6 +474,7 @@ static struct rcu_torture_ops rcu_sync_ops = { | |||
| 460 | .completed = rcu_torture_completed, | 474 | .completed = rcu_torture_completed, |
| 461 | .deferred_free = rcu_sync_torture_deferred_free, | 475 | .deferred_free = rcu_sync_torture_deferred_free, |
| 462 | .sync = synchronize_rcu, | 476 | .sync = synchronize_rcu, |
| 477 | .call = NULL, | ||
| 463 | .cb_barrier = NULL, | 478 | .cb_barrier = NULL, |
| 464 | .fqs = rcu_force_quiescent_state, | 479 | .fqs = rcu_force_quiescent_state, |
| 465 | .stats = NULL, | 480 | .stats = NULL, |
| @@ -477,6 +492,7 @@ static struct rcu_torture_ops rcu_expedited_ops = { | |||
| 477 | .completed = rcu_no_completed, | 492 | .completed = rcu_no_completed, |
| 478 | .deferred_free = rcu_sync_torture_deferred_free, | 493 | .deferred_free = rcu_sync_torture_deferred_free, |
| 479 | .sync = synchronize_rcu_expedited, | 494 | .sync = synchronize_rcu_expedited, |
| 495 | .call = NULL, | ||
| 480 | .cb_barrier = NULL, | 496 | .cb_barrier = NULL, |
| 481 | .fqs = rcu_force_quiescent_state, | 497 | .fqs = rcu_force_quiescent_state, |
| 482 | .stats = NULL, | 498 | .stats = NULL, |
| @@ -519,6 +535,7 @@ static struct rcu_torture_ops rcu_bh_ops = { | |||
| 519 | .completed = rcu_bh_torture_completed, | 535 | .completed = rcu_bh_torture_completed, |
| 520 | .deferred_free = rcu_bh_torture_deferred_free, | 536 | .deferred_free = rcu_bh_torture_deferred_free, |
| 521 | .sync = synchronize_rcu_bh, | 537 | .sync = synchronize_rcu_bh, |
| 538 | .call = call_rcu_bh, | ||
| 522 | .cb_barrier = rcu_barrier_bh, | 539 | .cb_barrier = rcu_barrier_bh, |
| 523 | .fqs = rcu_bh_force_quiescent_state, | 540 | .fqs = rcu_bh_force_quiescent_state, |
| 524 | .stats = NULL, | 541 | .stats = NULL, |
| @@ -535,6 +552,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = { | |||
| 535 | .completed = rcu_bh_torture_completed, | 552 | .completed = rcu_bh_torture_completed, |
| 536 | .deferred_free = rcu_sync_torture_deferred_free, | 553 | .deferred_free = rcu_sync_torture_deferred_free, |
| 537 | .sync = synchronize_rcu_bh, | 554 | .sync = synchronize_rcu_bh, |
| 555 | .call = NULL, | ||
| 538 | .cb_barrier = NULL, | 556 | .cb_barrier = NULL, |
| 539 | .fqs = rcu_bh_force_quiescent_state, | 557 | .fqs = rcu_bh_force_quiescent_state, |
| 540 | .stats = NULL, | 558 | .stats = NULL, |
| @@ -551,6 +569,7 @@ static struct rcu_torture_ops rcu_bh_expedited_ops = { | |||
| 551 | .completed = rcu_bh_torture_completed, | 569 | .completed = rcu_bh_torture_completed, |
| 552 | .deferred_free = rcu_sync_torture_deferred_free, | 570 | .deferred_free = rcu_sync_torture_deferred_free, |
| 553 | .sync = synchronize_rcu_bh_expedited, | 571 | .sync = synchronize_rcu_bh_expedited, |
| 572 | .call = NULL, | ||
| 554 | .cb_barrier = NULL, | 573 | .cb_barrier = NULL, |
| 555 | .fqs = rcu_bh_force_quiescent_state, | 574 | .fqs = rcu_bh_force_quiescent_state, |
| 556 | .stats = NULL, | 575 | .stats = NULL, |
| @@ -606,6 +625,11 @@ static int srcu_torture_completed(void) | |||
| 606 | return srcu_batches_completed(&srcu_ctl); | 625 | return srcu_batches_completed(&srcu_ctl); |
| 607 | } | 626 | } |
| 608 | 627 | ||
| 628 | static void srcu_torture_deferred_free(struct rcu_torture *rp) | ||
| 629 | { | ||
| 630 | call_srcu(&srcu_ctl, &rp->rtort_rcu, rcu_torture_cb); | ||
| 631 | } | ||
| 632 | |||
| 609 | static void srcu_torture_synchronize(void) | 633 | static void srcu_torture_synchronize(void) |
| 610 | { | 634 | { |
| 611 | synchronize_srcu(&srcu_ctl); | 635 | synchronize_srcu(&srcu_ctl); |
| @@ -620,7 +644,7 @@ static int srcu_torture_stats(char *page) | |||
| 620 | cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):", | 644 | cnt += sprintf(&page[cnt], "%s%s per-CPU(idx=%d):", |
| 621 | torture_type, TORTURE_FLAG, idx); | 645 | torture_type, TORTURE_FLAG, idx); |
| 622 | for_each_possible_cpu(cpu) { | 646 | for_each_possible_cpu(cpu) { |
| 623 | cnt += sprintf(&page[cnt], " %d(%d,%d)", cpu, | 647 | cnt += sprintf(&page[cnt], " %d(%lu,%lu)", cpu, |
| 624 | per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx], | 648 | per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx], |
| 625 | per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]); | 649 | per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]); |
| 626 | } | 650 | } |
| @@ -635,13 +659,29 @@ static struct rcu_torture_ops srcu_ops = { | |||
| 635 | .read_delay = srcu_read_delay, | 659 | .read_delay = srcu_read_delay, |
| 636 | .readunlock = srcu_torture_read_unlock, | 660 | .readunlock = srcu_torture_read_unlock, |
| 637 | .completed = srcu_torture_completed, | 661 | .completed = srcu_torture_completed, |
| 638 | .deferred_free = rcu_sync_torture_deferred_free, | 662 | .deferred_free = srcu_torture_deferred_free, |
| 639 | .sync = srcu_torture_synchronize, | 663 | .sync = srcu_torture_synchronize, |
| 664 | .call = NULL, | ||
| 640 | .cb_barrier = NULL, | 665 | .cb_barrier = NULL, |
| 641 | .stats = srcu_torture_stats, | 666 | .stats = srcu_torture_stats, |
| 642 | .name = "srcu" | 667 | .name = "srcu" |
| 643 | }; | 668 | }; |
| 644 | 669 | ||
| 670 | static struct rcu_torture_ops srcu_sync_ops = { | ||
| 671 | .init = srcu_torture_init, | ||
| 672 | .cleanup = srcu_torture_cleanup, | ||
| 673 | .readlock = srcu_torture_read_lock, | ||
| 674 | .read_delay = srcu_read_delay, | ||
| 675 | .readunlock = srcu_torture_read_unlock, | ||
| 676 | .completed = srcu_torture_completed, | ||
| 677 | .deferred_free = rcu_sync_torture_deferred_free, | ||
| 678 | .sync = srcu_torture_synchronize, | ||
| 679 | .call = NULL, | ||
| 680 | .cb_barrier = NULL, | ||
| 681 | .stats = srcu_torture_stats, | ||
| 682 | .name = "srcu_sync" | ||
| 683 | }; | ||
| 684 | |||
| 645 | static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl) | 685 | static int srcu_torture_read_lock_raw(void) __acquires(&srcu_ctl) |
| 646 | { | 686 | { |
| 647 | return srcu_read_lock_raw(&srcu_ctl); | 687 | return srcu_read_lock_raw(&srcu_ctl); |
| @@ -659,13 +699,29 @@ static struct rcu_torture_ops srcu_raw_ops = { | |||
| 659 | .read_delay = srcu_read_delay, | 699 | .read_delay = srcu_read_delay, |
| 660 | .readunlock = srcu_torture_read_unlock_raw, | 700 | .readunlock = srcu_torture_read_unlock_raw, |
| 661 | .completed = srcu_torture_completed, | 701 | .completed = srcu_torture_completed, |
| 662 | .deferred_free = rcu_sync_torture_deferred_free, | 702 | .deferred_free = srcu_torture_deferred_free, |
| 663 | .sync = srcu_torture_synchronize, | 703 | .sync = srcu_torture_synchronize, |
| 704 | .call = NULL, | ||
| 664 | .cb_barrier = NULL, | 705 | .cb_barrier = NULL, |
| 665 | .stats = srcu_torture_stats, | 706 | .stats = srcu_torture_stats, |
| 666 | .name = "srcu_raw" | 707 | .name = "srcu_raw" |
| 667 | }; | 708 | }; |
| 668 | 709 | ||
| 710 | static struct rcu_torture_ops srcu_raw_sync_ops = { | ||
| 711 | .init = srcu_torture_init, | ||
| 712 | .cleanup = srcu_torture_cleanup, | ||
| 713 | .readlock = srcu_torture_read_lock_raw, | ||
| 714 | .read_delay = srcu_read_delay, | ||
| 715 | .readunlock = srcu_torture_read_unlock_raw, | ||
| 716 | .completed = srcu_torture_completed, | ||
| 717 | .deferred_free = rcu_sync_torture_deferred_free, | ||
| 718 | .sync = srcu_torture_synchronize, | ||
| 719 | .call = NULL, | ||
| 720 | .cb_barrier = NULL, | ||
| 721 | .stats = srcu_torture_stats, | ||
| 722 | .name = "srcu_raw_sync" | ||
| 723 | }; | ||
| 724 | |||
| 669 | static void srcu_torture_synchronize_expedited(void) | 725 | static void srcu_torture_synchronize_expedited(void) |
| 670 | { | 726 | { |
| 671 | synchronize_srcu_expedited(&srcu_ctl); | 727 | synchronize_srcu_expedited(&srcu_ctl); |
| @@ -680,6 +736,7 @@ static struct rcu_torture_ops srcu_expedited_ops = { | |||
| 680 | .completed = srcu_torture_completed, | 736 | .completed = srcu_torture_completed, |
| 681 | .deferred_free = rcu_sync_torture_deferred_free, | 737 | .deferred_free = rcu_sync_torture_deferred_free, |
| 682 | .sync = srcu_torture_synchronize_expedited, | 738 | .sync = srcu_torture_synchronize_expedited, |
| 739 | .call = NULL, | ||
| 683 | .cb_barrier = NULL, | 740 | .cb_barrier = NULL, |
| 684 | .stats = srcu_torture_stats, | 741 | .stats = srcu_torture_stats, |
| 685 | .name = "srcu_expedited" | 742 | .name = "srcu_expedited" |
| @@ -1129,7 +1186,8 @@ rcu_torture_printk(char *page) | |||
| 1129 | "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " | 1186 | "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d " |
| 1130 | "rtmbe: %d rtbke: %ld rtbre: %ld " | 1187 | "rtmbe: %d rtbke: %ld rtbre: %ld " |
| 1131 | "rtbf: %ld rtb: %ld nt: %ld " | 1188 | "rtbf: %ld rtb: %ld nt: %ld " |
| 1132 | "onoff: %ld/%ld:%ld/%ld", | 1189 | "onoff: %ld/%ld:%ld/%ld " |
| 1190 | "barrier: %ld/%ld:%ld", | ||
| 1133 | rcu_torture_current, | 1191 | rcu_torture_current, |
| 1134 | rcu_torture_current_version, | 1192 | rcu_torture_current_version, |
| 1135 | list_empty(&rcu_torture_freelist), | 1193 | list_empty(&rcu_torture_freelist), |
| @@ -1145,14 +1203,17 @@ rcu_torture_printk(char *page) | |||
| 1145 | n_online_successes, | 1203 | n_online_successes, |
| 1146 | n_online_attempts, | 1204 | n_online_attempts, |
| 1147 | n_offline_successes, | 1205 | n_offline_successes, |
| 1148 | n_offline_attempts); | 1206 | n_offline_attempts, |
| 1207 | n_barrier_successes, | ||
| 1208 | n_barrier_attempts, | ||
| 1209 | n_rcu_torture_barrier_error); | ||
| 1210 | cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); | ||
| 1149 | if (atomic_read(&n_rcu_torture_mberror) != 0 || | 1211 | if (atomic_read(&n_rcu_torture_mberror) != 0 || |
| 1212 | n_rcu_torture_barrier_error != 0 || | ||
| 1150 | n_rcu_torture_boost_ktrerror != 0 || | 1213 | n_rcu_torture_boost_ktrerror != 0 || |
| 1151 | n_rcu_torture_boost_rterror != 0 || | 1214 | n_rcu_torture_boost_rterror != 0 || |
| 1152 | n_rcu_torture_boost_failure != 0) | 1215 | n_rcu_torture_boost_failure != 0 || |
| 1153 | cnt += sprintf(&page[cnt], " !!!"); | 1216 | i > 1) { |
| 1154 | cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); | ||
| 1155 | if (i > 1) { | ||
| 1156 | cnt += sprintf(&page[cnt], "!!! "); | 1217 | cnt += sprintf(&page[cnt], "!!! "); |
| 1157 | atomic_inc(&n_rcu_torture_error); | 1218 | atomic_inc(&n_rcu_torture_error); |
| 1158 | WARN_ON_ONCE(1); | 1219 | WARN_ON_ONCE(1); |
| @@ -1337,6 +1398,7 @@ static void rcutorture_booster_cleanup(int cpu) | |||
| 1337 | 1398 | ||
| 1338 | /* This must be outside of the mutex, otherwise deadlock! */ | 1399 | /* This must be outside of the mutex, otherwise deadlock! */ |
| 1339 | kthread_stop(t); | 1400 | kthread_stop(t); |
| 1401 | boost_tasks[cpu] = NULL; | ||
| 1340 | } | 1402 | } |
| 1341 | 1403 | ||
| 1342 | static int rcutorture_booster_init(int cpu) | 1404 | static int rcutorture_booster_init(int cpu) |
| @@ -1484,13 +1546,15 @@ static void rcu_torture_onoff_cleanup(void) | |||
| 1484 | return; | 1546 | return; |
| 1485 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task"); | 1547 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_onoff task"); |
| 1486 | kthread_stop(onoff_task); | 1548 | kthread_stop(onoff_task); |
| 1549 | onoff_task = NULL; | ||
| 1487 | } | 1550 | } |
| 1488 | 1551 | ||
| 1489 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1552 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
| 1490 | 1553 | ||
| 1491 | static void | 1554 | static int |
| 1492 | rcu_torture_onoff_init(void) | 1555 | rcu_torture_onoff_init(void) |
| 1493 | { | 1556 | { |
| 1557 | return 0; | ||
| 1494 | } | 1558 | } |
| 1495 | 1559 | ||
| 1496 | static void rcu_torture_onoff_cleanup(void) | 1560 | static void rcu_torture_onoff_cleanup(void) |
| @@ -1554,6 +1618,152 @@ static void rcu_torture_stall_cleanup(void) | |||
| 1554 | return; | 1618 | return; |
| 1555 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task."); | 1619 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stall_task."); |
| 1556 | kthread_stop(stall_task); | 1620 | kthread_stop(stall_task); |
| 1621 | stall_task = NULL; | ||
| 1622 | } | ||
| 1623 | |||
| 1624 | /* Callback function for RCU barrier testing. */ | ||
| 1625 | void rcu_torture_barrier_cbf(struct rcu_head *rcu) | ||
| 1626 | { | ||
| 1627 | atomic_inc(&barrier_cbs_invoked); | ||
| 1628 | } | ||
| 1629 | |||
| 1630 | /* kthread function to register callbacks used to test RCU barriers. */ | ||
| 1631 | static int rcu_torture_barrier_cbs(void *arg) | ||
| 1632 | { | ||
| 1633 | long myid = (long)arg; | ||
| 1634 | struct rcu_head rcu; | ||
| 1635 | |||
| 1636 | init_rcu_head_on_stack(&rcu); | ||
| 1637 | VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task started"); | ||
| 1638 | set_user_nice(current, 19); | ||
| 1639 | do { | ||
| 1640 | wait_event(barrier_cbs_wq[myid], | ||
| 1641 | atomic_read(&barrier_cbs_count) == n_barrier_cbs || | ||
| 1642 | kthread_should_stop() || | ||
| 1643 | fullstop != FULLSTOP_DONTSTOP); | ||
| 1644 | if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) | ||
| 1645 | break; | ||
| 1646 | cur_ops->call(&rcu, rcu_torture_barrier_cbf); | ||
| 1647 | if (atomic_dec_and_test(&barrier_cbs_count)) | ||
| 1648 | wake_up(&barrier_wq); | ||
| 1649 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); | ||
| 1650 | VERBOSE_PRINTK_STRING("rcu_torture_barrier_cbs task stopping"); | ||
| 1651 | rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); | ||
| 1652 | while (!kthread_should_stop()) | ||
| 1653 | schedule_timeout_interruptible(1); | ||
| 1654 | cur_ops->cb_barrier(); | ||
| 1655 | destroy_rcu_head_on_stack(&rcu); | ||
| 1656 | return 0; | ||
| 1657 | } | ||
| 1658 | |||
| 1659 | /* kthread function to drive and coordinate RCU barrier testing. */ | ||
| 1660 | static int rcu_torture_barrier(void *arg) | ||
| 1661 | { | ||
| 1662 | int i; | ||
| 1663 | |||
| 1664 | VERBOSE_PRINTK_STRING("rcu_torture_barrier task starting"); | ||
| 1665 | do { | ||
| 1666 | atomic_set(&barrier_cbs_invoked, 0); | ||
| 1667 | atomic_set(&barrier_cbs_count, n_barrier_cbs); | ||
| 1668 | /* wake_up() path contains the required barriers. */ | ||
| 1669 | for (i = 0; i < n_barrier_cbs; i++) | ||
| 1670 | wake_up(&barrier_cbs_wq[i]); | ||
| 1671 | wait_event(barrier_wq, | ||
| 1672 | atomic_read(&barrier_cbs_count) == 0 || | ||
| 1673 | kthread_should_stop() || | ||
| 1674 | fullstop != FULLSTOP_DONTSTOP); | ||
| 1675 | if (kthread_should_stop() || fullstop != FULLSTOP_DONTSTOP) | ||
| 1676 | break; | ||
| 1677 | n_barrier_attempts++; | ||
| 1678 | cur_ops->cb_barrier(); | ||
| 1679 | if (atomic_read(&barrier_cbs_invoked) != n_barrier_cbs) { | ||
| 1680 | n_rcu_torture_barrier_error++; | ||
| 1681 | WARN_ON_ONCE(1); | ||
| 1682 | } | ||
| 1683 | n_barrier_successes++; | ||
| 1684 | schedule_timeout_interruptible(HZ / 10); | ||
| 1685 | } while (!kthread_should_stop() && fullstop == FULLSTOP_DONTSTOP); | ||
| 1686 | VERBOSE_PRINTK_STRING("rcu_torture_barrier task stopping"); | ||
| 1687 | rcutorture_shutdown_absorb("rcu_torture_barrier_cbs"); | ||
| 1688 | while (!kthread_should_stop()) | ||
| 1689 | schedule_timeout_interruptible(1); | ||
| 1690 | return 0; | ||
| 1691 | } | ||
| 1692 | |||
| 1693 | /* Initialize RCU barrier testing. */ | ||
| 1694 | static int rcu_torture_barrier_init(void) | ||
| 1695 | { | ||
| 1696 | int i; | ||
| 1697 | int ret; | ||
| 1698 | |||
| 1699 | if (n_barrier_cbs == 0) | ||
| 1700 | return 0; | ||
| 1701 | if (cur_ops->call == NULL || cur_ops->cb_barrier == NULL) { | ||
| 1702 | printk(KERN_ALERT "%s" TORTURE_FLAG | ||
| 1703 | " Call or barrier ops missing for %s,\n", | ||
| 1704 | torture_type, cur_ops->name); | ||
| 1705 | printk(KERN_ALERT "%s" TORTURE_FLAG | ||
| 1706 | " RCU barrier testing omitted from run.\n", | ||
| 1707 | torture_type); | ||
| 1708 | return 0; | ||
| 1709 | } | ||
| 1710 | atomic_set(&barrier_cbs_count, 0); | ||
| 1711 | atomic_set(&barrier_cbs_invoked, 0); | ||
| 1712 | barrier_cbs_tasks = | ||
| 1713 | kzalloc(n_barrier_cbs * sizeof(barrier_cbs_tasks[0]), | ||
| 1714 | GFP_KERNEL); | ||
| 1715 | barrier_cbs_wq = | ||
| 1716 | kzalloc(n_barrier_cbs * sizeof(barrier_cbs_wq[0]), | ||
| 1717 | GFP_KERNEL); | ||
| 1718 | if (barrier_cbs_tasks == NULL || barrier_cbs_wq == 0) | ||
| 1719 | return -ENOMEM; | ||
| 1720 | for (i = 0; i < n_barrier_cbs; i++) { | ||
| 1721 | init_waitqueue_head(&barrier_cbs_wq[i]); | ||
| 1722 | barrier_cbs_tasks[i] = kthread_run(rcu_torture_barrier_cbs, | ||
| 1723 | (void *)(long)i, | ||
| 1724 | "rcu_torture_barrier_cbs"); | ||
| 1725 | if (IS_ERR(barrier_cbs_tasks[i])) { | ||
| 1726 | ret = PTR_ERR(barrier_cbs_tasks[i]); | ||
| 1727 | VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier_cbs"); | ||
| 1728 | barrier_cbs_tasks[i] = NULL; | ||
| 1729 | return ret; | ||
| 1730 | } | ||
| 1731 | } | ||
| 1732 | barrier_task = kthread_run(rcu_torture_barrier, NULL, | ||
| 1733 | "rcu_torture_barrier"); | ||
| 1734 | if (IS_ERR(barrier_task)) { | ||
| 1735 | ret = PTR_ERR(barrier_task); | ||
| 1736 | VERBOSE_PRINTK_ERRSTRING("Failed to create rcu_torture_barrier"); | ||
| 1737 | barrier_task = NULL; | ||
| 1738 | } | ||
| 1739 | return 0; | ||
| 1740 | } | ||
| 1741 | |||
| 1742 | /* Clean up after RCU barrier testing. */ | ||
| 1743 | static void rcu_torture_barrier_cleanup(void) | ||
| 1744 | { | ||
| 1745 | int i; | ||
| 1746 | |||
| 1747 | if (barrier_task != NULL) { | ||
| 1748 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier task"); | ||
| 1749 | kthread_stop(barrier_task); | ||
| 1750 | barrier_task = NULL; | ||
| 1751 | } | ||
| 1752 | if (barrier_cbs_tasks != NULL) { | ||
| 1753 | for (i = 0; i < n_barrier_cbs; i++) { | ||
| 1754 | if (barrier_cbs_tasks[i] != NULL) { | ||
| 1755 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_barrier_cbs task"); | ||
| 1756 | kthread_stop(barrier_cbs_tasks[i]); | ||
| 1757 | barrier_cbs_tasks[i] = NULL; | ||
| 1758 | } | ||
| 1759 | } | ||
| 1760 | kfree(barrier_cbs_tasks); | ||
| 1761 | barrier_cbs_tasks = NULL; | ||
| 1762 | } | ||
| 1763 | if (barrier_cbs_wq != NULL) { | ||
| 1764 | kfree(barrier_cbs_wq); | ||
| 1765 | barrier_cbs_wq = NULL; | ||
| 1766 | } | ||
| 1557 | } | 1767 | } |
| 1558 | 1768 | ||
| 1559 | static int rcutorture_cpu_notify(struct notifier_block *self, | 1769 | static int rcutorture_cpu_notify(struct notifier_block *self, |
| @@ -1598,6 +1808,7 @@ rcu_torture_cleanup(void) | |||
| 1598 | fullstop = FULLSTOP_RMMOD; | 1808 | fullstop = FULLSTOP_RMMOD; |
| 1599 | mutex_unlock(&fullstop_mutex); | 1809 | mutex_unlock(&fullstop_mutex); |
| 1600 | unregister_reboot_notifier(&rcutorture_shutdown_nb); | 1810 | unregister_reboot_notifier(&rcutorture_shutdown_nb); |
| 1811 | rcu_torture_barrier_cleanup(); | ||
| 1601 | rcu_torture_stall_cleanup(); | 1812 | rcu_torture_stall_cleanup(); |
| 1602 | if (stutter_task) { | 1813 | if (stutter_task) { |
| 1603 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); | 1814 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task"); |
| @@ -1665,6 +1876,7 @@ rcu_torture_cleanup(void) | |||
| 1665 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task"); | 1876 | VERBOSE_PRINTK_STRING("Stopping rcu_torture_shutdown task"); |
| 1666 | kthread_stop(shutdown_task); | 1877 | kthread_stop(shutdown_task); |
| 1667 | } | 1878 | } |
| 1879 | shutdown_task = NULL; | ||
| 1668 | rcu_torture_onoff_cleanup(); | 1880 | rcu_torture_onoff_cleanup(); |
| 1669 | 1881 | ||
| 1670 | /* Wait for all RCU callbacks to fire. */ | 1882 | /* Wait for all RCU callbacks to fire. */ |
| @@ -1676,7 +1888,7 @@ rcu_torture_cleanup(void) | |||
| 1676 | 1888 | ||
| 1677 | if (cur_ops->cleanup) | 1889 | if (cur_ops->cleanup) |
| 1678 | cur_ops->cleanup(); | 1890 | cur_ops->cleanup(); |
| 1679 | if (atomic_read(&n_rcu_torture_error)) | 1891 | if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error) |
| 1680 | rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); | 1892 | rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); |
| 1681 | else if (n_online_successes != n_online_attempts || | 1893 | else if (n_online_successes != n_online_attempts || |
| 1682 | n_offline_successes != n_offline_attempts) | 1894 | n_offline_successes != n_offline_attempts) |
| @@ -1692,10 +1904,12 @@ rcu_torture_init(void) | |||
| 1692 | int i; | 1904 | int i; |
| 1693 | int cpu; | 1905 | int cpu; |
| 1694 | int firsterr = 0; | 1906 | int firsterr = 0; |
| 1907 | int retval; | ||
| 1695 | static struct rcu_torture_ops *torture_ops[] = | 1908 | static struct rcu_torture_ops *torture_ops[] = |
| 1696 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, | 1909 | { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, |
| 1697 | &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, | 1910 | &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops, |
| 1698 | &srcu_ops, &srcu_raw_ops, &srcu_expedited_ops, | 1911 | &srcu_ops, &srcu_sync_ops, &srcu_raw_ops, |
| 1912 | &srcu_raw_sync_ops, &srcu_expedited_ops, | ||
| 1699 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; | 1913 | &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; |
| 1700 | 1914 | ||
| 1701 | mutex_lock(&fullstop_mutex); | 1915 | mutex_lock(&fullstop_mutex); |
| @@ -1749,6 +1963,7 @@ rcu_torture_init(void) | |||
| 1749 | atomic_set(&n_rcu_torture_free, 0); | 1963 | atomic_set(&n_rcu_torture_free, 0); |
| 1750 | atomic_set(&n_rcu_torture_mberror, 0); | 1964 | atomic_set(&n_rcu_torture_mberror, 0); |
| 1751 | atomic_set(&n_rcu_torture_error, 0); | 1965 | atomic_set(&n_rcu_torture_error, 0); |
| 1966 | n_rcu_torture_barrier_error = 0; | ||
| 1752 | n_rcu_torture_boost_ktrerror = 0; | 1967 | n_rcu_torture_boost_ktrerror = 0; |
| 1753 | n_rcu_torture_boost_rterror = 0; | 1968 | n_rcu_torture_boost_rterror = 0; |
| 1754 | n_rcu_torture_boost_failure = 0; | 1969 | n_rcu_torture_boost_failure = 0; |
| @@ -1872,7 +2087,6 @@ rcu_torture_init(void) | |||
| 1872 | test_boost_duration = 2; | 2087 | test_boost_duration = 2; |
| 1873 | if ((test_boost == 1 && cur_ops->can_boost) || | 2088 | if ((test_boost == 1 && cur_ops->can_boost) || |
| 1874 | test_boost == 2) { | 2089 | test_boost == 2) { |
| 1875 | int retval; | ||
| 1876 | 2090 | ||
| 1877 | boost_starttime = jiffies + test_boost_interval * HZ; | 2091 | boost_starttime = jiffies + test_boost_interval * HZ; |
| 1878 | register_cpu_notifier(&rcutorture_cpu_nb); | 2092 | register_cpu_notifier(&rcutorture_cpu_nb); |
| @@ -1897,9 +2111,22 @@ rcu_torture_init(void) | |||
| 1897 | goto unwind; | 2111 | goto unwind; |
| 1898 | } | 2112 | } |
| 1899 | } | 2113 | } |
| 1900 | rcu_torture_onoff_init(); | 2114 | i = rcu_torture_onoff_init(); |
| 2115 | if (i != 0) { | ||
| 2116 | firsterr = i; | ||
| 2117 | goto unwind; | ||
| 2118 | } | ||
| 1901 | register_reboot_notifier(&rcutorture_shutdown_nb); | 2119 | register_reboot_notifier(&rcutorture_shutdown_nb); |
| 1902 | rcu_torture_stall_init(); | 2120 | i = rcu_torture_stall_init(); |
| 2121 | if (i != 0) { | ||
| 2122 | firsterr = i; | ||
| 2123 | goto unwind; | ||
| 2124 | } | ||
| 2125 | retval = rcu_torture_barrier_init(); | ||
| 2126 | if (retval != 0) { | ||
| 2127 | firsterr = retval; | ||
| 2128 | goto unwind; | ||
| 2129 | } | ||
| 1903 | rcutorture_record_test_transition(); | 2130 | rcutorture_record_test_transition(); |
| 1904 | mutex_unlock(&fullstop_mutex); | 2131 | mutex_unlock(&fullstop_mutex); |
| 1905 | return 0; | 2132 | return 0; |
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index e578dd327c64..b3ea3ac3a2b5 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -201,7 +201,6 @@ void rcu_note_context_switch(int cpu) | |||
| 201 | { | 201 | { |
| 202 | trace_rcu_utilization("Start context switch"); | 202 | trace_rcu_utilization("Start context switch"); |
| 203 | rcu_sched_qs(cpu); | 203 | rcu_sched_qs(cpu); |
| 204 | rcu_preempt_note_context_switch(cpu); | ||
| 205 | trace_rcu_utilization("End context switch"); | 204 | trace_rcu_utilization("End context switch"); |
| 206 | } | 205 | } |
| 207 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 206 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
| @@ -1953,6 +1952,38 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
| 1953 | } | 1952 | } |
| 1954 | EXPORT_SYMBOL_GPL(call_rcu_bh); | 1953 | EXPORT_SYMBOL_GPL(call_rcu_bh); |
| 1955 | 1954 | ||
| 1955 | /* | ||
| 1956 | * Because a context switch is a grace period for RCU-sched and RCU-bh, | ||
| 1957 | * any blocking grace-period wait automatically implies a grace period | ||
| 1958 | * if there is only one CPU online at any point time during execution | ||
| 1959 | * of either synchronize_sched() or synchronize_rcu_bh(). It is OK to | ||
| 1960 | * occasionally incorrectly indicate that there are multiple CPUs online | ||
| 1961 | * when there was in fact only one the whole time, as this just adds | ||
| 1962 | * some overhead: RCU still operates correctly. | ||
| 1963 | * | ||
| 1964 | * Of course, sampling num_online_cpus() with preemption enabled can | ||
| 1965 | * give erroneous results if there are concurrent CPU-hotplug operations. | ||
| 1966 | * For example, given a demonic sequence of preemptions in num_online_cpus() | ||
| 1967 | * and CPU-hotplug operations, there could be two or more CPUs online at | ||
| 1968 | * all times, but num_online_cpus() might well return one (or even zero). | ||
| 1969 | * | ||
| 1970 | * However, all such demonic sequences require at least one CPU-offline | ||
| 1971 | * operation. Furthermore, rcu_blocking_is_gp() giving the wrong answer | ||
| 1972 | * is only a problem if there is an RCU read-side critical section executing | ||
| 1973 | * throughout. But RCU-sched and RCU-bh read-side critical sections | ||
| 1974 | * disable either preemption or bh, which prevents a CPU from going offline. | ||
| 1975 | * Therefore, the only way that rcu_blocking_is_gp() can incorrectly return | ||
| 1976 | * that there is only one CPU when in fact there was more than one throughout | ||
| 1977 | * is when there were no RCU readers in the system. If there are no | ||
| 1978 | * RCU readers, the grace period by definition can be of zero length, | ||
| 1979 | * regardless of the number of online CPUs. | ||
| 1980 | */ | ||
| 1981 | static inline int rcu_blocking_is_gp(void) | ||
| 1982 | { | ||
| 1983 | might_sleep(); /* Check for RCU read-side critical section. */ | ||
| 1984 | return num_online_cpus() <= 1; | ||
| 1985 | } | ||
| 1986 | |||
| 1956 | /** | 1987 | /** |
| 1957 | * synchronize_sched - wait until an rcu-sched grace period has elapsed. | 1988 | * synchronize_sched - wait until an rcu-sched grace period has elapsed. |
| 1958 | * | 1989 | * |
| @@ -2543,7 +2574,7 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) | |||
| 2543 | 2574 | ||
| 2544 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) | 2575 | for (i = NUM_RCU_LVLS - 1; i > 0; i--) |
| 2545 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; | 2576 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; |
| 2546 | rsp->levelspread[0] = RCU_FANOUT_LEAF; | 2577 | rsp->levelspread[0] = CONFIG_RCU_FANOUT_LEAF; |
| 2547 | } | 2578 | } |
| 2548 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ | 2579 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ |
| 2549 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 2580 | static void __init rcu_init_levelspread(struct rcu_state *rsp) |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 1e49c5685960..7f5d138dedf5 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
| @@ -29,18 +29,14 @@ | |||
| 29 | #include <linux/seqlock.h> | 29 | #include <linux/seqlock.h> |
| 30 | 30 | ||
| 31 | /* | 31 | /* |
| 32 | * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. | 32 | * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and |
| 33 | * CONFIG_RCU_FANOUT_LEAF. | ||
| 33 | * In theory, it should be possible to add more levels straightforwardly. | 34 | * In theory, it should be possible to add more levels straightforwardly. |
| 34 | * In practice, this did work well going from three levels to four. | 35 | * In practice, this did work well going from three levels to four. |
| 35 | * Of course, your mileage may vary. | 36 | * Of course, your mileage may vary. |
| 36 | */ | 37 | */ |
| 37 | #define MAX_RCU_LVLS 4 | 38 | #define MAX_RCU_LVLS 4 |
| 38 | #if CONFIG_RCU_FANOUT > 16 | 39 | #define RCU_FANOUT_1 (CONFIG_RCU_FANOUT_LEAF) |
| 39 | #define RCU_FANOUT_LEAF 16 | ||
| 40 | #else /* #if CONFIG_RCU_FANOUT > 16 */ | ||
| 41 | #define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT) | ||
| 42 | #endif /* #else #if CONFIG_RCU_FANOUT > 16 */ | ||
| 43 | #define RCU_FANOUT_1 (RCU_FANOUT_LEAF) | ||
| 44 | #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) | 40 | #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) |
| 45 | #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) | 41 | #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) |
| 46 | #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) | 42 | #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) |
| @@ -434,7 +430,6 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work); | |||
| 434 | /* Forward declarations for rcutree_plugin.h */ | 430 | /* Forward declarations for rcutree_plugin.h */ |
| 435 | static void rcu_bootup_announce(void); | 431 | static void rcu_bootup_announce(void); |
| 436 | long rcu_batches_completed(void); | 432 | long rcu_batches_completed(void); |
| 437 | static void rcu_preempt_note_context_switch(int cpu); | ||
| 438 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); | 433 | static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); |
| 439 | #ifdef CONFIG_HOTPLUG_CPU | 434 | #ifdef CONFIG_HOTPLUG_CPU |
| 440 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, | 435 | static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 7082ea93566f..2411000d9869 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
| @@ -153,7 +153,7 @@ static void rcu_preempt_qs(int cpu) | |||
| 153 | * | 153 | * |
| 154 | * Caller must disable preemption. | 154 | * Caller must disable preemption. |
| 155 | */ | 155 | */ |
| 156 | static void rcu_preempt_note_context_switch(int cpu) | 156 | void rcu_preempt_note_context_switch(void) |
| 157 | { | 157 | { |
| 158 | struct task_struct *t = current; | 158 | struct task_struct *t = current; |
| 159 | unsigned long flags; | 159 | unsigned long flags; |
| @@ -164,7 +164,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
| 164 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { | 164 | (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { |
| 165 | 165 | ||
| 166 | /* Possibly blocking in an RCU read-side critical section. */ | 166 | /* Possibly blocking in an RCU read-side critical section. */ |
| 167 | rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); | 167 | rdp = __this_cpu_ptr(rcu_preempt_state.rda); |
| 168 | rnp = rdp->mynode; | 168 | rnp = rdp->mynode; |
| 169 | raw_spin_lock_irqsave(&rnp->lock, flags); | 169 | raw_spin_lock_irqsave(&rnp->lock, flags); |
| 170 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; | 170 | t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; |
| @@ -228,7 +228,7 @@ static void rcu_preempt_note_context_switch(int cpu) | |||
| 228 | * means that we continue to block the current grace period. | 228 | * means that we continue to block the current grace period. |
| 229 | */ | 229 | */ |
| 230 | local_irq_save(flags); | 230 | local_irq_save(flags); |
| 231 | rcu_preempt_qs(cpu); | 231 | rcu_preempt_qs(smp_processor_id()); |
| 232 | local_irq_restore(flags); | 232 | local_irq_restore(flags); |
| 233 | } | 233 | } |
| 234 | 234 | ||
| @@ -969,22 +969,6 @@ static void __init __rcu_init_preempt(void) | |||
| 969 | rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); | 969 | rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); |
| 970 | } | 970 | } |
| 971 | 971 | ||
| 972 | /* | ||
| 973 | * Check for a task exiting while in a preemptible-RCU read-side | ||
| 974 | * critical section, clean up if so. No need to issue warnings, | ||
| 975 | * as debug_check_no_locks_held() already does this if lockdep | ||
| 976 | * is enabled. | ||
| 977 | */ | ||
| 978 | void exit_rcu(void) | ||
| 979 | { | ||
| 980 | struct task_struct *t = current; | ||
| 981 | |||
| 982 | if (t->rcu_read_lock_nesting == 0) | ||
| 983 | return; | ||
| 984 | t->rcu_read_lock_nesting = 1; | ||
| 985 | __rcu_read_unlock(); | ||
| 986 | } | ||
| 987 | |||
| 988 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | 972 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ |
| 989 | 973 | ||
| 990 | static struct rcu_state *rcu_state = &rcu_sched_state; | 974 | static struct rcu_state *rcu_state = &rcu_sched_state; |
| @@ -1018,14 +1002,6 @@ void rcu_force_quiescent_state(void) | |||
| 1018 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); | 1002 | EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); |
| 1019 | 1003 | ||
| 1020 | /* | 1004 | /* |
| 1021 | * Because preemptible RCU does not exist, we never have to check for | ||
| 1022 | * CPUs being in quiescent states. | ||
| 1023 | */ | ||
| 1024 | static void rcu_preempt_note_context_switch(int cpu) | ||
| 1025 | { | ||
| 1026 | } | ||
| 1027 | |||
| 1028 | /* | ||
| 1029 | * Because preemptible RCU does not exist, there are never any preempted | 1005 | * Because preemptible RCU does not exist, there are never any preempted |
| 1030 | * RCU readers. | 1006 | * RCU readers. |
| 1031 | */ | 1007 | */ |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 4603b9d8f30a..5d89eb93f7e4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -2083,6 +2083,7 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
| 2083 | #endif | 2083 | #endif |
| 2084 | 2084 | ||
| 2085 | /* Here we just switch the register state and the stack. */ | 2085 | /* Here we just switch the register state and the stack. */ |
| 2086 | rcu_switch_from(prev); | ||
| 2086 | switch_to(prev, next, prev); | 2087 | switch_to(prev, next, prev); |
| 2087 | 2088 | ||
| 2088 | barrier(); | 2089 | barrier(); |
diff --git a/kernel/srcu.c b/kernel/srcu.c index ba35f3a4a1f4..2095be3318d5 100644 --- a/kernel/srcu.c +++ b/kernel/srcu.c | |||
| @@ -34,10 +34,77 @@ | |||
| 34 | #include <linux/delay.h> | 34 | #include <linux/delay.h> |
| 35 | #include <linux/srcu.h> | 35 | #include <linux/srcu.h> |
| 36 | 36 | ||
| 37 | /* | ||
| 38 | * Initialize an rcu_batch structure to empty. | ||
| 39 | */ | ||
| 40 | static inline void rcu_batch_init(struct rcu_batch *b) | ||
| 41 | { | ||
| 42 | b->head = NULL; | ||
| 43 | b->tail = &b->head; | ||
| 44 | } | ||
| 45 | |||
| 46 | /* | ||
| 47 | * Enqueue a callback onto the tail of the specified rcu_batch structure. | ||
| 48 | */ | ||
| 49 | static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head) | ||
| 50 | { | ||
| 51 | *b->tail = head; | ||
| 52 | b->tail = &head->next; | ||
| 53 | } | ||
| 54 | |||
| 55 | /* | ||
| 56 | * Is the specified rcu_batch structure empty? | ||
| 57 | */ | ||
| 58 | static inline bool rcu_batch_empty(struct rcu_batch *b) | ||
| 59 | { | ||
| 60 | return b->tail == &b->head; | ||
| 61 | } | ||
| 62 | |||
| 63 | /* | ||
| 64 | * Remove the callback at the head of the specified rcu_batch structure | ||
| 65 | * and return a pointer to it, or return NULL if the structure is empty. | ||
| 66 | */ | ||
| 67 | static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b) | ||
| 68 | { | ||
| 69 | struct rcu_head *head; | ||
| 70 | |||
| 71 | if (rcu_batch_empty(b)) | ||
| 72 | return NULL; | ||
| 73 | |||
| 74 | head = b->head; | ||
| 75 | b->head = head->next; | ||
| 76 | if (b->tail == &head->next) | ||
| 77 | rcu_batch_init(b); | ||
| 78 | |||
| 79 | return head; | ||
| 80 | } | ||
| 81 | |||
| 82 | /* | ||
| 83 | * Move all callbacks from the rcu_batch structure specified by "from" to | ||
| 84 | * the structure specified by "to". | ||
| 85 | */ | ||
| 86 | static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from) | ||
| 87 | { | ||
| 88 | if (!rcu_batch_empty(from)) { | ||
| 89 | *to->tail = from->head; | ||
| 90 | to->tail = from->tail; | ||
| 91 | rcu_batch_init(from); | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | /* single-thread state-machine */ | ||
| 96 | static void process_srcu(struct work_struct *work); | ||
| 97 | |||
| 37 | static int init_srcu_struct_fields(struct srcu_struct *sp) | 98 | static int init_srcu_struct_fields(struct srcu_struct *sp) |
| 38 | { | 99 | { |
| 39 | sp->completed = 0; | 100 | sp->completed = 0; |
| 40 | mutex_init(&sp->mutex); | 101 | spin_lock_init(&sp->queue_lock); |
| 102 | sp->running = false; | ||
| 103 | rcu_batch_init(&sp->batch_queue); | ||
| 104 | rcu_batch_init(&sp->batch_check0); | ||
| 105 | rcu_batch_init(&sp->batch_check1); | ||
| 106 | rcu_batch_init(&sp->batch_done); | ||
| 107 | INIT_DELAYED_WORK(&sp->work, process_srcu); | ||
| 41 | sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); | 108 | sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); |
| 42 | return sp->per_cpu_ref ? 0 : -ENOMEM; | 109 | return sp->per_cpu_ref ? 0 : -ENOMEM; |
| 43 | } | 110 | } |
| @@ -73,21 +140,116 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); | |||
| 73 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 140 | #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
| 74 | 141 | ||
| 75 | /* | 142 | /* |
| 76 | * srcu_readers_active_idx -- returns approximate number of readers | 143 | * Returns approximate total of the readers' ->seq[] values for the |
| 77 | * active on the specified rank of per-CPU counters. | 144 | * rank of per-CPU counters specified by idx. |
| 78 | */ | 145 | */ |
| 146 | static unsigned long srcu_readers_seq_idx(struct srcu_struct *sp, int idx) | ||
| 147 | { | ||
| 148 | int cpu; | ||
| 149 | unsigned long sum = 0; | ||
| 150 | unsigned long t; | ||
| 79 | 151 | ||
| 80 | static int srcu_readers_active_idx(struct srcu_struct *sp, int idx) | 152 | for_each_possible_cpu(cpu) { |
| 153 | t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->seq[idx]); | ||
| 154 | sum += t; | ||
| 155 | } | ||
| 156 | return sum; | ||
| 157 | } | ||
| 158 | |||
| 159 | /* | ||
| 160 | * Returns approximate number of readers active on the specified rank | ||
| 161 | * of the per-CPU ->c[] counters. | ||
| 162 | */ | ||
| 163 | static unsigned long srcu_readers_active_idx(struct srcu_struct *sp, int idx) | ||
| 81 | { | 164 | { |
| 82 | int cpu; | 165 | int cpu; |
| 83 | int sum; | 166 | unsigned long sum = 0; |
| 167 | unsigned long t; | ||
| 84 | 168 | ||
| 85 | sum = 0; | 169 | for_each_possible_cpu(cpu) { |
| 86 | for_each_possible_cpu(cpu) | 170 | t = ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]); |
| 87 | sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]; | 171 | sum += t; |
| 172 | } | ||
| 88 | return sum; | 173 | return sum; |
| 89 | } | 174 | } |
| 90 | 175 | ||
| 176 | /* | ||
| 177 | * Return true if the number of pre-existing readers is determined to | ||
| 178 | * be stably zero. An example unstable zero can occur if the call | ||
| 179 | * to srcu_readers_active_idx() misses an __srcu_read_lock() increment, | ||
| 180 | * but due to task migration, sees the corresponding __srcu_read_unlock() | ||
| 181 | * decrement. This can happen because srcu_readers_active_idx() takes | ||
| 182 | * time to sum the array, and might in fact be interrupted or preempted | ||
| 183 | * partway through the summation. | ||
| 184 | */ | ||
| 185 | static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) | ||
| 186 | { | ||
| 187 | unsigned long seq; | ||
| 188 | |||
| 189 | seq = srcu_readers_seq_idx(sp, idx); | ||
| 190 | |||
| 191 | /* | ||
| 192 | * The following smp_mb() A pairs with the smp_mb() B located in | ||
| 193 | * __srcu_read_lock(). This pairing ensures that if an | ||
| 194 | * __srcu_read_lock() increments its counter after the summation | ||
| 195 | * in srcu_readers_active_idx(), then the corresponding SRCU read-side | ||
| 196 | * critical section will see any changes made prior to the start | ||
| 197 | * of the current SRCU grace period. | ||
| 198 | * | ||
| 199 | * Also, if the above call to srcu_readers_seq_idx() saw the | ||
| 200 | * increment of ->seq[], then the call to srcu_readers_active_idx() | ||
| 201 | * must see the increment of ->c[]. | ||
| 202 | */ | ||
| 203 | smp_mb(); /* A */ | ||
| 204 | |||
| 205 | /* | ||
| 206 | * Note that srcu_readers_active_idx() can incorrectly return | ||
| 207 | * zero even though there is a pre-existing reader throughout. | ||
| 208 | * To see this, suppose that task A is in a very long SRCU | ||
| 209 | * read-side critical section that started on CPU 0, and that | ||
| 210 | * no other reader exists, so that the sum of the counters | ||
| 211 | * is equal to one. Then suppose that task B starts executing | ||
| 212 | * srcu_readers_active_idx(), summing up to CPU 1, and then that | ||
| 213 | * task C starts reading on CPU 0, so that its increment is not | ||
| 214 | * summed, but finishes reading on CPU 2, so that its decrement | ||
| 215 | * -is- summed. Then when task B completes its sum, it will | ||
| 216 | * incorrectly get zero, despite the fact that task A has been | ||
| 217 | * in its SRCU read-side critical section the whole time. | ||
| 218 | * | ||
| 219 | * We therefore do a validation step should srcu_readers_active_idx() | ||
| 220 | * return zero. | ||
| 221 | */ | ||
| 222 | if (srcu_readers_active_idx(sp, idx) != 0) | ||
| 223 | return false; | ||
| 224 | |||
| 225 | /* | ||
| 226 | * The remainder of this function is the validation step. | ||
| 227 | * The following smp_mb() D pairs with the smp_mb() C in | ||
| 228 | * __srcu_read_unlock(). If the __srcu_read_unlock() was seen | ||
| 229 | * by srcu_readers_active_idx() above, then any destructive | ||
| 230 | * operation performed after the grace period will happen after | ||
| 231 | * the corresponding SRCU read-side critical section. | ||
| 232 | * | ||
| 233 | * Note that there can be at most NR_CPUS worth of readers using | ||
| 234 | * the old index, which is not enough to overflow even a 32-bit | ||
| 235 | * integer. (Yes, this does mean that systems having more than | ||
| 236 | * a billion or so CPUs need to be 64-bit systems.) Therefore, | ||
| 237 | * the sum of the ->seq[] counters cannot possibly overflow. | ||
| 238 | * Therefore, the only way that the return values of the two | ||
| 239 | * calls to srcu_readers_seq_idx() can be equal is if there were | ||
| 240 | * no increments of the corresponding rank of ->seq[] counts | ||
| 241 | * in the interim. But the missed-increment scenario laid out | ||
| 242 | * above includes an increment of the ->seq[] counter by | ||
| 243 | * the corresponding __srcu_read_lock(). Therefore, if this | ||
| 244 | * scenario occurs, the return values from the two calls to | ||
| 245 | * srcu_readers_seq_idx() will differ, and thus the validation | ||
| 246 | * step below suffices. | ||
| 247 | */ | ||
| 248 | smp_mb(); /* D */ | ||
| 249 | |||
| 250 | return srcu_readers_seq_idx(sp, idx) == seq; | ||
| 251 | } | ||
| 252 | |||
| 91 | /** | 253 | /** |
| 92 | * srcu_readers_active - returns approximate number of readers. | 254 | * srcu_readers_active - returns approximate number of readers. |
| 93 | * @sp: which srcu_struct to count active readers (holding srcu_read_lock). | 255 | * @sp: which srcu_struct to count active readers (holding srcu_read_lock). |
| @@ -98,7 +260,14 @@ static int srcu_readers_active_idx(struct srcu_struct *sp, int idx) | |||
| 98 | */ | 260 | */ |
| 99 | static int srcu_readers_active(struct srcu_struct *sp) | 261 | static int srcu_readers_active(struct srcu_struct *sp) |
| 100 | { | 262 | { |
| 101 | return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1); | 263 | int cpu; |
| 264 | unsigned long sum = 0; | ||
| 265 | |||
| 266 | for_each_possible_cpu(cpu) { | ||
| 267 | sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[0]); | ||
| 268 | sum += ACCESS_ONCE(per_cpu_ptr(sp->per_cpu_ref, cpu)->c[1]); | ||
| 269 | } | ||
| 270 | return sum; | ||
| 102 | } | 271 | } |
| 103 | 272 | ||
| 104 | /** | 273 | /** |
| @@ -131,10 +300,11 @@ int __srcu_read_lock(struct srcu_struct *sp) | |||
| 131 | int idx; | 300 | int idx; |
| 132 | 301 | ||
| 133 | preempt_disable(); | 302 | preempt_disable(); |
| 134 | idx = sp->completed & 0x1; | 303 | idx = rcu_dereference_index_check(sp->completed, |
| 135 | barrier(); /* ensure compiler looks -once- at sp->completed. */ | 304 | rcu_read_lock_sched_held()) & 0x1; |
| 136 | per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]++; | 305 | ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) += 1; |
| 137 | srcu_barrier(); /* ensure compiler won't misorder critical section. */ | 306 | smp_mb(); /* B */ /* Avoid leaking the critical section. */ |
| 307 | ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->seq[idx]) += 1; | ||
| 138 | preempt_enable(); | 308 | preempt_enable(); |
| 139 | return idx; | 309 | return idx; |
| 140 | } | 310 | } |
| @@ -149,8 +319,8 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); | |||
| 149 | void __srcu_read_unlock(struct srcu_struct *sp, int idx) | 319 | void __srcu_read_unlock(struct srcu_struct *sp, int idx) |
| 150 | { | 320 | { |
| 151 | preempt_disable(); | 321 | preempt_disable(); |
| 152 | srcu_barrier(); /* ensure compiler won't misorder critical section. */ | 322 | smp_mb(); /* C */ /* Avoid leaking the critical section. */ |
| 153 | per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--; | 323 | ACCESS_ONCE(this_cpu_ptr(sp->per_cpu_ref)->c[idx]) -= 1; |
| 154 | preempt_enable(); | 324 | preempt_enable(); |
| 155 | } | 325 | } |
| 156 | EXPORT_SYMBOL_GPL(__srcu_read_unlock); | 326 | EXPORT_SYMBOL_GPL(__srcu_read_unlock); |
| @@ -163,106 +333,119 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); | |||
| 163 | * we repeatedly block for 1-millisecond time periods. This approach | 333 | * we repeatedly block for 1-millisecond time periods. This approach |
| 164 | * has done well in testing, so there is no need for a config parameter. | 334 | * has done well in testing, so there is no need for a config parameter. |
| 165 | */ | 335 | */ |
| 166 | #define SYNCHRONIZE_SRCU_READER_DELAY 10 | 336 | #define SRCU_RETRY_CHECK_DELAY 5 |
| 337 | #define SYNCHRONIZE_SRCU_TRYCOUNT 2 | ||
| 338 | #define SYNCHRONIZE_SRCU_EXP_TRYCOUNT 12 | ||
| 167 | 339 | ||
| 168 | /* | 340 | /* |
| 169 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). | 341 | * @@@ Wait until all pre-existing readers complete. Such readers |
| 342 | * will have used the index specified by "idx". | ||
| 343 | * the caller should ensures the ->completed is not changed while checking | ||
| 344 | * and idx = (->completed & 1) ^ 1 | ||
| 170 | */ | 345 | */ |
| 171 | static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) | 346 | static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) |
| 172 | { | 347 | { |
| 173 | int idx; | 348 | for (;;) { |
| 174 | 349 | if (srcu_readers_active_idx_check(sp, idx)) | |
| 175 | rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && | 350 | return true; |
| 176 | !lock_is_held(&rcu_bh_lock_map) && | 351 | if (--trycount <= 0) |
| 177 | !lock_is_held(&rcu_lock_map) && | 352 | return false; |
| 178 | !lock_is_held(&rcu_sched_lock_map), | 353 | udelay(SRCU_RETRY_CHECK_DELAY); |
| 179 | "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); | 354 | } |
| 180 | 355 | } | |
| 181 | idx = sp->completed; | ||
| 182 | mutex_lock(&sp->mutex); | ||
| 183 | 356 | ||
| 184 | /* | 357 | /* |
| 185 | * Check to see if someone else did the work for us while we were | 358 | * Increment the ->completed counter so that future SRCU readers will |
| 186 | * waiting to acquire the lock. We need -two- advances of | 359 | * use the other rank of the ->c[] and ->seq[] arrays. This allows |
| 187 | * the counter, not just one. If there was but one, we might have | 360 | * us to wait for pre-existing readers in a starvation-free manner. |
| 188 | * shown up -after- our helper's first synchronize_sched(), thus | 361 | */ |
| 189 | * having failed to prevent CPU-reordering races with concurrent | 362 | static void srcu_flip(struct srcu_struct *sp) |
| 190 | * srcu_read_unlock()s on other CPUs (see comment below). So we | 363 | { |
| 191 | * either (1) wait for two or (2) supply the second ourselves. | 364 | sp->completed++; |
| 192 | */ | 365 | } |
| 193 | 366 | ||
| 194 | if ((sp->completed - idx) >= 2) { | 367 | /* |
| 195 | mutex_unlock(&sp->mutex); | 368 | * Enqueue an SRCU callback on the specified srcu_struct structure, |
| 196 | return; | 369 | * initiating grace-period processing if it is not already running. |
| 370 | */ | ||
| 371 | void call_srcu(struct srcu_struct *sp, struct rcu_head *head, | ||
| 372 | void (*func)(struct rcu_head *head)) | ||
| 373 | { | ||
| 374 | unsigned long flags; | ||
| 375 | |||
| 376 | head->next = NULL; | ||
| 377 | head->func = func; | ||
| 378 | spin_lock_irqsave(&sp->queue_lock, flags); | ||
| 379 | rcu_batch_queue(&sp->batch_queue, head); | ||
| 380 | if (!sp->running) { | ||
| 381 | sp->running = true; | ||
| 382 | queue_delayed_work(system_nrt_wq, &sp->work, 0); | ||
| 197 | } | 383 | } |
| 384 | spin_unlock_irqrestore(&sp->queue_lock, flags); | ||
| 385 | } | ||
| 386 | EXPORT_SYMBOL_GPL(call_srcu); | ||
| 198 | 387 | ||
| 199 | sync_func(); /* Force memory barrier on all CPUs. */ | 388 | struct rcu_synchronize { |
| 389 | struct rcu_head head; | ||
| 390 | struct completion completion; | ||
| 391 | }; | ||
| 200 | 392 | ||
| 201 | /* | 393 | /* |
| 202 | * The preceding synchronize_sched() ensures that any CPU that | 394 | * Awaken the corresponding synchronize_srcu() instance now that a |
| 203 | * sees the new value of sp->completed will also see any preceding | 395 | * grace period has elapsed. |
| 204 | * changes to data structures made by this CPU. This prevents | 396 | */ |
| 205 | * some other CPU from reordering the accesses in its SRCU | 397 | static void wakeme_after_rcu(struct rcu_head *head) |
| 206 | * read-side critical section to precede the corresponding | 398 | { |
| 207 | * srcu_read_lock() -- ensuring that such references will in | 399 | struct rcu_synchronize *rcu; |
| 208 | * fact be protected. | ||
| 209 | * | ||
| 210 | * So it is now safe to do the flip. | ||
| 211 | */ | ||
| 212 | 400 | ||
| 213 | idx = sp->completed & 0x1; | 401 | rcu = container_of(head, struct rcu_synchronize, head); |
| 214 | sp->completed++; | 402 | complete(&rcu->completion); |
| 403 | } | ||
| 215 | 404 | ||
| 216 | sync_func(); /* Force memory barrier on all CPUs. */ | 405 | static void srcu_advance_batches(struct srcu_struct *sp, int trycount); |
| 406 | static void srcu_reschedule(struct srcu_struct *sp); | ||
| 217 | 407 | ||
| 218 | /* | 408 | /* |
| 219 | * At this point, because of the preceding synchronize_sched(), | 409 | * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). |
| 220 | * all srcu_read_lock() calls using the old counters have completed. | 410 | */ |
| 221 | * Their corresponding critical sections might well be still | 411 | static void __synchronize_srcu(struct srcu_struct *sp, int trycount) |
| 222 | * executing, but the srcu_read_lock() primitives themselves | 412 | { |
| 223 | * will have finished executing. We initially give readers | 413 | struct rcu_synchronize rcu; |
| 224 | * an arbitrarily chosen 10 microseconds to get out of their | 414 | struct rcu_head *head = &rcu.head; |
| 225 | * SRCU read-side critical sections, then loop waiting 1/HZ | 415 | bool done = false; |
| 226 | * seconds per iteration. The 10-microsecond value has done | ||
| 227 | * very well in testing. | ||
| 228 | */ | ||
| 229 | |||
| 230 | if (srcu_readers_active_idx(sp, idx)) | ||
| 231 | udelay(SYNCHRONIZE_SRCU_READER_DELAY); | ||
| 232 | while (srcu_readers_active_idx(sp, idx)) | ||
| 233 | schedule_timeout_interruptible(1); | ||
| 234 | 416 | ||
| 235 | sync_func(); /* Force memory barrier on all CPUs. */ | 417 | rcu_lockdep_assert(!lock_is_held(&sp->dep_map) && |
| 418 | !lock_is_held(&rcu_bh_lock_map) && | ||
| 419 | !lock_is_held(&rcu_lock_map) && | ||
| 420 | !lock_is_held(&rcu_sched_lock_map), | ||
| 421 | "Illegal synchronize_srcu() in same-type SRCU (or RCU) read-side critical section"); | ||
| 236 | 422 | ||
| 237 | /* | 423 | init_completion(&rcu.completion); |
| 238 | * The preceding synchronize_sched() forces all srcu_read_unlock() | 424 | |
| 239 | * primitives that were executing concurrently with the preceding | 425 | head->next = NULL; |
| 240 | * for_each_possible_cpu() loop to have completed by this point. | 426 | head->func = wakeme_after_rcu; |
| 241 | * More importantly, it also forces the corresponding SRCU read-side | 427 | spin_lock_irq(&sp->queue_lock); |
| 242 | * critical sections to have also completed, and the corresponding | 428 | if (!sp->running) { |
| 243 | * references to SRCU-protected data items to be dropped. | 429 | /* steal the processing owner */ |
| 244 | * | 430 | sp->running = true; |
| 245 | * Note: | 431 | rcu_batch_queue(&sp->batch_check0, head); |
| 246 | * | 432 | spin_unlock_irq(&sp->queue_lock); |
| 247 | * Despite what you might think at first glance, the | 433 | |
| 248 | * preceding synchronize_sched() -must- be within the | 434 | srcu_advance_batches(sp, trycount); |
| 249 | * critical section ended by the following mutex_unlock(). | 435 | if (!rcu_batch_empty(&sp->batch_done)) { |
| 250 | * Otherwise, a task taking the early exit can race | 436 | BUG_ON(sp->batch_done.head != head); |
| 251 | * with a srcu_read_unlock(), which might have executed | 437 | rcu_batch_dequeue(&sp->batch_done); |
| 252 | * just before the preceding srcu_readers_active() check, | 438 | done = true; |
| 253 | * and whose CPU might have reordered the srcu_read_unlock() | 439 | } |
| 254 | * with the preceding critical section. In this case, there | 440 | /* give the processing owner to work_struct */ |
| 255 | * is nothing preventing the synchronize_sched() task that is | 441 | srcu_reschedule(sp); |
| 256 | * taking the early exit from freeing a data structure that | 442 | } else { |
| 257 | * is still being referenced (out of order) by the task | 443 | rcu_batch_queue(&sp->batch_queue, head); |
| 258 | * doing the srcu_read_unlock(). | 444 | spin_unlock_irq(&sp->queue_lock); |
| 259 | * | 445 | } |
| 260 | * Alternatively, the comparison with "2" on the early exit | ||
| 261 | * could be changed to "3", but this increases synchronize_srcu() | ||
| 262 | * latency for bulk loads. So the current code is preferred. | ||
| 263 | */ | ||
| 264 | 446 | ||
| 265 | mutex_unlock(&sp->mutex); | 447 | if (!done) |
| 448 | wait_for_completion(&rcu.completion); | ||
| 266 | } | 449 | } |
| 267 | 450 | ||
| 268 | /** | 451 | /** |
| @@ -281,7 +464,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, void (*sync_func)(void)) | |||
| 281 | */ | 464 | */ |
| 282 | void synchronize_srcu(struct srcu_struct *sp) | 465 | void synchronize_srcu(struct srcu_struct *sp) |
| 283 | { | 466 | { |
| 284 | __synchronize_srcu(sp, synchronize_sched); | 467 | __synchronize_srcu(sp, SYNCHRONIZE_SRCU_TRYCOUNT); |
| 285 | } | 468 | } |
| 286 | EXPORT_SYMBOL_GPL(synchronize_srcu); | 469 | EXPORT_SYMBOL_GPL(synchronize_srcu); |
| 287 | 470 | ||
| @@ -289,18 +472,11 @@ EXPORT_SYMBOL_GPL(synchronize_srcu); | |||
| 289 | * synchronize_srcu_expedited - Brute-force SRCU grace period | 472 | * synchronize_srcu_expedited - Brute-force SRCU grace period |
| 290 | * @sp: srcu_struct with which to synchronize. | 473 | * @sp: srcu_struct with which to synchronize. |
| 291 | * | 474 | * |
| 292 | * Wait for an SRCU grace period to elapse, but use a "big hammer" | 475 | * Wait for an SRCU grace period to elapse, but be more aggressive about |
| 293 | * approach to force the grace period to end quickly. This consumes | 476 | * spinning rather than blocking when waiting. |
| 294 | * significant time on all CPUs and is unfriendly to real-time workloads, | ||
| 295 | * so is thus not recommended for any sort of common-case code. In fact, | ||
| 296 | * if you are using synchronize_srcu_expedited() in a loop, please | ||
| 297 | * restructure your code to batch your updates, and then use a single | ||
| 298 | * synchronize_srcu() instead. | ||
| 299 | * | 477 | * |
| 300 | * Note that it is illegal to call this function while holding any lock | 478 | * Note that it is illegal to call this function while holding any lock |
| 301 | * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal | 479 | * that is acquired by a CPU-hotplug notifier. It is also illegal to call |
| 302 | * to call this function from a CPU-hotplug notifier. Failing to observe | ||
| 303 | * these restriction will result in deadlock. It is also illegal to call | ||
| 304 | * synchronize_srcu_expedited() from the corresponding SRCU read-side | 480 | * synchronize_srcu_expedited() from the corresponding SRCU read-side |
| 305 | * critical section; doing so will result in deadlock. However, it is | 481 | * critical section; doing so will result in deadlock. However, it is |
| 306 | * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct | 482 | * perfectly legal to call synchronize_srcu_expedited() on one srcu_struct |
| @@ -309,20 +485,166 @@ EXPORT_SYMBOL_GPL(synchronize_srcu); | |||
| 309 | */ | 485 | */ |
| 310 | void synchronize_srcu_expedited(struct srcu_struct *sp) | 486 | void synchronize_srcu_expedited(struct srcu_struct *sp) |
| 311 | { | 487 | { |
| 312 | __synchronize_srcu(sp, synchronize_sched_expedited); | 488 | __synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT); |
| 313 | } | 489 | } |
| 314 | EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); | 490 | EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); |
| 315 | 491 | ||
| 316 | /** | 492 | /** |
| 493 | * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. | ||
| 494 | */ | ||
| 495 | void srcu_barrier(struct srcu_struct *sp) | ||
| 496 | { | ||
| 497 | synchronize_srcu(sp); | ||
| 498 | } | ||
| 499 | EXPORT_SYMBOL_GPL(srcu_barrier); | ||
| 500 | |||
| 501 | /** | ||
| 317 | * srcu_batches_completed - return batches completed. | 502 | * srcu_batches_completed - return batches completed. |
| 318 | * @sp: srcu_struct on which to report batch completion. | 503 | * @sp: srcu_struct on which to report batch completion. |
| 319 | * | 504 | * |
| 320 | * Report the number of batches, correlated with, but not necessarily | 505 | * Report the number of batches, correlated with, but not necessarily |
| 321 | * precisely the same as, the number of grace periods that have elapsed. | 506 | * precisely the same as, the number of grace periods that have elapsed. |
| 322 | */ | 507 | */ |
| 323 | |||
| 324 | long srcu_batches_completed(struct srcu_struct *sp) | 508 | long srcu_batches_completed(struct srcu_struct *sp) |
| 325 | { | 509 | { |
| 326 | return sp->completed; | 510 | return sp->completed; |
| 327 | } | 511 | } |
| 328 | EXPORT_SYMBOL_GPL(srcu_batches_completed); | 512 | EXPORT_SYMBOL_GPL(srcu_batches_completed); |
| 513 | |||
| 514 | #define SRCU_CALLBACK_BATCH 10 | ||
| 515 | #define SRCU_INTERVAL 1 | ||
| 516 | |||
| 517 | /* | ||
| 518 | * Move any new SRCU callbacks to the first stage of the SRCU grace | ||
| 519 | * period pipeline. | ||
| 520 | */ | ||
| 521 | static void srcu_collect_new(struct srcu_struct *sp) | ||
| 522 | { | ||
| 523 | if (!rcu_batch_empty(&sp->batch_queue)) { | ||
| 524 | spin_lock_irq(&sp->queue_lock); | ||
| 525 | rcu_batch_move(&sp->batch_check0, &sp->batch_queue); | ||
| 526 | spin_unlock_irq(&sp->queue_lock); | ||
| 527 | } | ||
| 528 | } | ||
| 529 | |||
| 530 | /* | ||
| 531 | * Core SRCU state machine. Advance callbacks from ->batch_check0 to | ||
| 532 | * ->batch_check1 and then to ->batch_done as readers drain. | ||
| 533 | */ | ||
| 534 | static void srcu_advance_batches(struct srcu_struct *sp, int trycount) | ||
| 535 | { | ||
| 536 | int idx = 1 ^ (sp->completed & 1); | ||
| 537 | |||
| 538 | /* | ||
| 539 | * Because readers might be delayed for an extended period after | ||
| 540 | * fetching ->completed for their index, at any point in time there | ||
| 541 | * might well be readers using both idx=0 and idx=1. We therefore | ||
| 542 | * need to wait for readers to clear from both index values before | ||
| 543 | * invoking a callback. | ||
| 544 | */ | ||
| 545 | |||
| 546 | if (rcu_batch_empty(&sp->batch_check0) && | ||
| 547 | rcu_batch_empty(&sp->batch_check1)) | ||
| 548 | return; /* no callbacks need to be advanced */ | ||
| 549 | |||
| 550 | if (!try_check_zero(sp, idx, trycount)) | ||
| 551 | return; /* failed to advance, will try after SRCU_INTERVAL */ | ||
| 552 | |||
| 553 | /* | ||
| 554 | * The callbacks in ->batch_check1 have already done with their | ||
| 555 | * first zero check and flip back when they were enqueued on | ||
| 556 | * ->batch_check0 in a previous invocation of srcu_advance_batches(). | ||
| 557 | * (Presumably try_check_zero() returned false during that | ||
| 558 | * invocation, leaving the callbacks stranded on ->batch_check1.) | ||
| 559 | * They are therefore ready to invoke, so move them to ->batch_done. | ||
| 560 | */ | ||
| 561 | rcu_batch_move(&sp->batch_done, &sp->batch_check1); | ||
| 562 | |||
| 563 | if (rcu_batch_empty(&sp->batch_check0)) | ||
| 564 | return; /* no callbacks need to be advanced */ | ||
| 565 | srcu_flip(sp); | ||
| 566 | |||
| 567 | /* | ||
| 568 | * The callbacks in ->batch_check0 just finished their | ||
| 569 | * first check zero and flip, so move them to ->batch_check1 | ||
| 570 | * for future checking on the other idx. | ||
| 571 | */ | ||
| 572 | rcu_batch_move(&sp->batch_check1, &sp->batch_check0); | ||
| 573 | |||
| 574 | /* | ||
| 575 | * SRCU read-side critical sections are normally short, so check | ||
| 576 | * at least twice in quick succession after a flip. | ||
| 577 | */ | ||
| 578 | trycount = trycount < 2 ? 2 : trycount; | ||
| 579 | if (!try_check_zero(sp, idx^1, trycount)) | ||
| 580 | return; /* failed to advance, will try after SRCU_INTERVAL */ | ||
| 581 | |||
| 582 | /* | ||
| 583 | * The callbacks in ->batch_check1 have now waited for all | ||
| 584 | * pre-existing readers using both idx values. They are therefore | ||
| 585 | * ready to invoke, so move them to ->batch_done. | ||
| 586 | */ | ||
| 587 | rcu_batch_move(&sp->batch_done, &sp->batch_check1); | ||
| 588 | } | ||
| 589 | |||
| 590 | /* | ||
| 591 | * Invoke a limited number of SRCU callbacks that have passed through | ||
| 592 | * their grace period. If there are more to do, SRCU will reschedule | ||
| 593 | * the workqueue. | ||
| 594 | */ | ||
| 595 | static void srcu_invoke_callbacks(struct srcu_struct *sp) | ||
| 596 | { | ||
| 597 | int i; | ||
| 598 | struct rcu_head *head; | ||
| 599 | |||
| 600 | for (i = 0; i < SRCU_CALLBACK_BATCH; i++) { | ||
| 601 | head = rcu_batch_dequeue(&sp->batch_done); | ||
| 602 | if (!head) | ||
| 603 | break; | ||
| 604 | local_bh_disable(); | ||
| 605 | head->func(head); | ||
| 606 | local_bh_enable(); | ||
| 607 | } | ||
| 608 | } | ||
| 609 | |||
| 610 | /* | ||
| 611 | * Finished one round of SRCU grace period. Start another if there are | ||
| 612 | * more SRCU callbacks queued, otherwise put SRCU into not-running state. | ||
| 613 | */ | ||
| 614 | static void srcu_reschedule(struct srcu_struct *sp) | ||
| 615 | { | ||
| 616 | bool pending = true; | ||
| 617 | |||
| 618 | if (rcu_batch_empty(&sp->batch_done) && | ||
| 619 | rcu_batch_empty(&sp->batch_check1) && | ||
| 620 | rcu_batch_empty(&sp->batch_check0) && | ||
| 621 | rcu_batch_empty(&sp->batch_queue)) { | ||
| 622 | spin_lock_irq(&sp->queue_lock); | ||
| 623 | if (rcu_batch_empty(&sp->batch_done) && | ||
| 624 | rcu_batch_empty(&sp->batch_check1) && | ||
| 625 | rcu_batch_empty(&sp->batch_check0) && | ||
| 626 | rcu_batch_empty(&sp->batch_queue)) { | ||
| 627 | sp->running = false; | ||
| 628 | pending = false; | ||
| 629 | } | ||
| 630 | spin_unlock_irq(&sp->queue_lock); | ||
| 631 | } | ||
| 632 | |||
| 633 | if (pending) | ||
| 634 | queue_delayed_work(system_nrt_wq, &sp->work, SRCU_INTERVAL); | ||
| 635 | } | ||
| 636 | |||
| 637 | /* | ||
| 638 | * This is the work-queue function that handles SRCU grace periods. | ||
| 639 | */ | ||
| 640 | static void process_srcu(struct work_struct *work) | ||
| 641 | { | ||
| 642 | struct srcu_struct *sp; | ||
| 643 | |||
| 644 | sp = container_of(work, struct srcu_struct, work.work); | ||
| 645 | |||
| 646 | srcu_collect_new(sp); | ||
| 647 | srcu_advance_batches(sp, 1); | ||
| 648 | srcu_invoke_callbacks(sp); | ||
| 649 | srcu_reschedule(sp); | ||
| 650 | } | ||
diff --git a/kernel/timer.c b/kernel/timer.c index a297ffcf888e..837c552fe838 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
| @@ -861,7 +861,13 @@ EXPORT_SYMBOL(mod_timer); | |||
| 861 | * | 861 | * |
| 862 | * mod_timer_pinned() is a way to update the expire field of an | 862 | * mod_timer_pinned() is a way to update the expire field of an |
| 863 | * active timer (if the timer is inactive it will be activated) | 863 | * active timer (if the timer is inactive it will be activated) |
| 864 | * and not allow the timer to be migrated to a different CPU. | 864 | * and to ensure that the timer is scheduled on the current CPU. |
| 865 | * | ||
| 866 | * Note that this does not prevent the timer from being migrated | ||
| 867 | * when the current CPU goes offline. If this is a problem for | ||
| 868 | * you, use CPU-hotplug notifiers to handle it correctly, for | ||
| 869 | * example, cancelling the timer when the corresponding CPU goes | ||
| 870 | * offline. | ||
| 865 | * | 871 | * |
| 866 | * mod_timer_pinned(timer, expires) is equivalent to: | 872 | * mod_timer_pinned(timer, expires) is equivalent to: |
| 867 | * | 873 | * |
