aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2009-06-25 12:08:16 -0400
committerIngo Molnar <mingo@elte.hu>2009-07-03 04:02:28 -0400
commit03b042bf1dc14a268a3d65d38b4ec2a4261e8477 (patch)
tree10e422a1396087d14cec164d8f4962d7ce8bcf2d /kernel
parentc17ef45342cc033fdf7bdd5b28615e0090f8d2e7 (diff)
rcu: Add synchronize_sched_expedited() primitive
This adds the synchronize_sched_expedited() primitive that implements the "big hammer" expedited RCU grace periods. This primitive is placed in kernel/sched.c rather than kernel/rcupdate.c due to its need to interact closely with the migration_thread() kthread. The idea is to wake up this kthread with req->task set to NULL, in response to which the kthread reports the quiescent state resulting from the kthread having been scheduled. Because this patch needs to fallback to the slow versions of the primitives in response to some races with CPU onlining and offlining, a new synchronize_rcu_bh() primitive is added as well. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Cc: davem@davemloft.net Cc: dada1@cosmosbay.com Cc: zbr@ioremap.net Cc: jeff.chua.linux@gmail.com Cc: paulus@samba.org Cc: laijs@cn.fujitsu.com Cc: jengelh@medozas.de Cc: r000n@r000n.net Cc: benh@kernel.crashing.org Cc: mathieu.desnoyers@polymtl.ca LKML-Reference: <12459460982947-git-send-email-> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcupdate.c25
-rw-r--r--kernel/sched.c129
2 files changed, 152 insertions, 2 deletions
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a967c9feb90a..eae29c25fb14 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -98,6 +98,30 @@ void synchronize_rcu(void)
98} 98}
99EXPORT_SYMBOL_GPL(synchronize_rcu); 99EXPORT_SYMBOL_GPL(synchronize_rcu);
100 100
101/**
102 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
103 *
104 * Control will return to the caller some time after a full rcu_bh grace
105 * period has elapsed, in other words after all currently executing rcu_bh
106 * read-side critical sections have completed. RCU read-side critical
107 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
108 * and may be nested.
109 */
110void synchronize_rcu_bh(void)
111{
112 struct rcu_synchronize rcu;
113
114 if (rcu_blocking_is_gp())
115 return;
116
117 init_completion(&rcu.completion);
118 /* Will wake me after RCU finished. */
119 call_rcu_bh(&rcu.head, wakeme_after_rcu);
120 /* Wait for it. */
121 wait_for_completion(&rcu.completion);
122}
123EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
124
101static void rcu_barrier_callback(struct rcu_head *notused) 125static void rcu_barrier_callback(struct rcu_head *notused)
102{ 126{
103 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 127 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
@@ -129,6 +153,7 @@ static void rcu_barrier_func(void *type)
129static inline void wait_migrated_callbacks(void) 153static inline void wait_migrated_callbacks(void)
130{ 154{
131 wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count)); 155 wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
156 smp_mb(); /* In case we didn't sleep. */
132} 157}
133 158
134/* 159/*
diff --git a/kernel/sched.c b/kernel/sched.c
index 7c9098d186e6..9ae80bec1c1e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7024,6 +7024,11 @@ fail:
7024 return ret; 7024 return ret;
7025} 7025}
7026 7026
7027#define RCU_MIGRATION_IDLE 0
7028#define RCU_MIGRATION_NEED_QS 1
7029#define RCU_MIGRATION_GOT_QS 2
7030#define RCU_MIGRATION_MUST_SYNC 3
7031
7027/* 7032/*
7028 * migration_thread - this is a highprio system thread that performs 7033 * migration_thread - this is a highprio system thread that performs
7029 * thread migration by bumping thread off CPU then 'pushing' onto 7034 * thread migration by bumping thread off CPU then 'pushing' onto
@@ -7031,6 +7036,7 @@ fail:
7031 */ 7036 */
7032static int migration_thread(void *data) 7037static int migration_thread(void *data)
7033{ 7038{
7039 int badcpu;
7034 int cpu = (long)data; 7040 int cpu = (long)data;
7035 struct rq *rq; 7041 struct rq *rq;
7036 7042
@@ -7065,8 +7071,17 @@ static int migration_thread(void *data)
7065 req = list_entry(head->next, struct migration_req, list); 7071 req = list_entry(head->next, struct migration_req, list);
7066 list_del_init(head->next); 7072 list_del_init(head->next);
7067 7073
7068 spin_unlock(&rq->lock); 7074 if (req->task != NULL) {
7069 __migrate_task(req->task, cpu, req->dest_cpu); 7075 spin_unlock(&rq->lock);
7076 __migrate_task(req->task, cpu, req->dest_cpu);
7077 } else if (likely(cpu == (badcpu = smp_processor_id()))) {
7078 req->dest_cpu = RCU_MIGRATION_GOT_QS;
7079 spin_unlock(&rq->lock);
7080 } else {
7081 req->dest_cpu = RCU_MIGRATION_MUST_SYNC;
7082 spin_unlock(&rq->lock);
7083 WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu);
7084 }
7070 local_irq_enable(); 7085 local_irq_enable();
7071 7086
7072 complete(&req->done); 7087 complete(&req->done);
@@ -10554,3 +10569,113 @@ struct cgroup_subsys cpuacct_subsys = {
10554 .subsys_id = cpuacct_subsys_id, 10569 .subsys_id = cpuacct_subsys_id,
10555}; 10570};
10556#endif /* CONFIG_CGROUP_CPUACCT */ 10571#endif /* CONFIG_CGROUP_CPUACCT */
10572
10573#ifndef CONFIG_SMP
10574
10575int rcu_expedited_torture_stats(char *page)
10576{
10577 return 0;
10578}
10579EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
10580
10581void synchronize_sched_expedited(void)
10582{
10583}
10584EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
10585
10586#else /* #ifndef CONFIG_SMP */
10587
10588static DEFINE_PER_CPU(struct migration_req, rcu_migration_req);
10589static DEFINE_MUTEX(rcu_sched_expedited_mutex);
10590
10591#define RCU_EXPEDITED_STATE_POST -2
10592#define RCU_EXPEDITED_STATE_IDLE -1
10593
10594static int rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
10595
10596int rcu_expedited_torture_stats(char *page)
10597{
10598 int cnt = 0;
10599 int cpu;
10600
10601 cnt += sprintf(&page[cnt], "state: %d /", rcu_expedited_state);
10602 for_each_online_cpu(cpu) {
10603 cnt += sprintf(&page[cnt], " %d:%d",
10604 cpu, per_cpu(rcu_migration_req, cpu).dest_cpu);
10605 }
10606 cnt += sprintf(&page[cnt], "\n");
10607 return cnt;
10608}
10609EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
10610
10611static long synchronize_sched_expedited_count;
10612
10613/*
10614 * Wait for an rcu-sched grace period to elapse, but use "big hammer"
10615 * approach to force grace period to end quickly. This consumes
10616 * significant time on all CPUs, and is thus not recommended for
10617 * any sort of common-case code.
10618 *
10619 * Note that it is illegal to call this function while holding any
10620 * lock that is acquired by a CPU-hotplug notifier. Failing to
10621 * observe this restriction will result in deadlock.
10622 */
10623void synchronize_sched_expedited(void)
10624{
10625 int cpu;
10626 unsigned long flags;
10627 bool need_full_sync = 0;
10628 struct rq *rq;
10629 struct migration_req *req;
10630 long snap;
10631 int trycount = 0;
10632
10633 smp_mb(); /* ensure prior mod happens before capturing snap. */
10634 snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1;
10635 get_online_cpus();
10636 while (!mutex_trylock(&rcu_sched_expedited_mutex)) {
10637 put_online_cpus();
10638 if (trycount++ < 10)
10639 udelay(trycount * num_online_cpus());
10640 else {
10641 synchronize_sched();
10642 return;
10643 }
10644 if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) {
10645 smp_mb(); /* ensure test happens before caller kfree */
10646 return;
10647 }
10648 get_online_cpus();
10649 }
10650 rcu_expedited_state = RCU_EXPEDITED_STATE_POST;
10651 for_each_online_cpu(cpu) {
10652 rq = cpu_rq(cpu);
10653 req = &per_cpu(rcu_migration_req, cpu);
10654 init_completion(&req->done);
10655 req->task = NULL;
10656 req->dest_cpu = RCU_MIGRATION_NEED_QS;
10657 spin_lock_irqsave(&rq->lock, flags);
10658 list_add(&req->list, &rq->migration_queue);
10659 spin_unlock_irqrestore(&rq->lock, flags);
10660 wake_up_process(rq->migration_thread);
10661 }
10662 for_each_online_cpu(cpu) {
10663 rcu_expedited_state = cpu;
10664 req = &per_cpu(rcu_migration_req, cpu);
10665 rq = cpu_rq(cpu);
10666 wait_for_completion(&req->done);
10667 spin_lock_irqsave(&rq->lock, flags);
10668 if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC))
10669 need_full_sync = 1;
10670 req->dest_cpu = RCU_MIGRATION_IDLE;
10671 spin_unlock_irqrestore(&rq->lock, flags);
10672 }
10673 rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
10674 mutex_unlock(&rcu_sched_expedited_mutex);
10675 put_online_cpus();
10676 if (need_full_sync)
10677 synchronize_sched();
10678}
10679EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
10680
10681#endif /* #else #ifndef CONFIG_SMP */