aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2009-06-25 12:08:16 -0400
committerIngo Molnar <mingo@elte.hu>2009-07-03 04:02:28 -0400
commit03b042bf1dc14a268a3d65d38b4ec2a4261e8477 (patch)
tree10e422a1396087d14cec164d8f4962d7ce8bcf2d
parentc17ef45342cc033fdf7bdd5b28615e0090f8d2e7 (diff)
rcu: Add synchronize_sched_expedited() primitive
This adds the synchronize_sched_expedited() primitive that implements the "big hammer" expedited RCU grace periods. This primitive is placed in kernel/sched.c rather than kernel/rcupdate.c due to its need to interact closely with the migration_thread() kthread. The idea is to wake up this kthread with req->task set to NULL, in response to which the kthread reports the quiescent state resulting from the kthread having been scheduled. Because this patch needs to fallback to the slow versions of the primitives in response to some races with CPU onlining and offlining, a new synchronize_rcu_bh() primitive is added as well. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Cc: davem@davemloft.net Cc: dada1@cosmosbay.com Cc: zbr@ioremap.net Cc: jeff.chua.linux@gmail.com Cc: paulus@samba.org Cc: laijs@cn.fujitsu.com Cc: jengelh@medozas.de Cc: r000n@r000n.net Cc: benh@kernel.crashing.org Cc: mathieu.desnoyers@polymtl.ca LKML-Reference: <12459460982947-git-send-email-> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/rcupdate.h25
-rw-r--r--include/linux/rcupreempt.h10
-rw-r--r--include/linux/rcutree.h12
-rw-r--r--kernel/rcupdate.c25
-rw-r--r--kernel/sched.c129
5 files changed, 186 insertions, 15 deletions
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 0cdfdb622faa..3c89d6a2591f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -51,7 +51,19 @@ struct rcu_head {
51 void (*func)(struct rcu_head *head); 51 void (*func)(struct rcu_head *head);
52}; 52};
53 53
54/* Internal to kernel, but needed by rcupreempt.h. */ 54/* Exported common interfaces */
55extern void synchronize_rcu(void);
56extern void synchronize_rcu_bh(void);
57extern void rcu_barrier(void);
58extern void rcu_barrier_bh(void);
59extern void rcu_barrier_sched(void);
60extern void synchronize_sched_expedited(void);
61extern int sched_expedited_torture_stats(char *page);
62
63/* Internal to kernel */
64extern void rcu_init(void);
65extern void rcu_scheduler_starting(void);
66extern int rcu_needs_cpu(int cpu);
55extern int rcu_scheduler_active; 67extern int rcu_scheduler_active;
56 68
57#if defined(CONFIG_TREE_RCU) 69#if defined(CONFIG_TREE_RCU)
@@ -257,15 +269,4 @@ extern void call_rcu(struct rcu_head *head,
257extern void call_rcu_bh(struct rcu_head *head, 269extern void call_rcu_bh(struct rcu_head *head,
258 void (*func)(struct rcu_head *head)); 270 void (*func)(struct rcu_head *head));
259 271
260/* Exported common interfaces */
261extern void synchronize_rcu(void);
262extern void rcu_barrier(void);
263extern void rcu_barrier_bh(void);
264extern void rcu_barrier_sched(void);
265
266/* Internal to kernel */
267extern void rcu_init(void);
268extern void rcu_scheduler_starting(void);
269extern int rcu_needs_cpu(int cpu);
270
271#endif /* __LINUX_RCUPDATE_H */ 272#endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index fce522782ffa..f164ac9b7807 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -74,6 +74,16 @@ extern int rcu_needs_cpu(int cpu);
74 74
75extern void __synchronize_sched(void); 75extern void __synchronize_sched(void);
76 76
77static inline void synchronize_rcu_expedited(void)
78{
79 synchronize_rcu(); /* Placeholder for new rcupreempt implementation. */
80}
81
82static inline void synchronize_rcu_bh_expedited(void)
83{
84 synchronize_rcu_bh(); /* Placeholder for new rcupreempt impl. */
85}
86
77extern void __rcu_init(void); 87extern void __rcu_init(void);
78extern void rcu_init_sched(void); 88extern void rcu_init_sched(void);
79extern void rcu_check_callbacks(int cpu, int user); 89extern void rcu_check_callbacks(int cpu, int user);
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 5a5153806c42..d4dfd2489633 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -286,8 +286,14 @@ static inline void __rcu_read_unlock_bh(void)
286 286
287#define call_rcu_sched(head, func) call_rcu(head, func) 287#define call_rcu_sched(head, func) call_rcu(head, func)
288 288
289static inline void rcu_init_sched(void) 289static inline void synchronize_rcu_expedited(void)
290{
291 synchronize_sched_expedited();
292}
293
294static inline void synchronize_rcu_bh_expedited(void)
290{ 295{
296 synchronize_sched_expedited();
291} 297}
292 298
293extern void __rcu_init(void); 299extern void __rcu_init(void);
@@ -297,6 +303,10 @@ extern void rcu_restart_cpu(int cpu);
297extern long rcu_batches_completed(void); 303extern long rcu_batches_completed(void);
298extern long rcu_batches_completed_bh(void); 304extern long rcu_batches_completed_bh(void);
299 305
306static inline void rcu_init_sched(void)
307{
308}
309
300#ifdef CONFIG_NO_HZ 310#ifdef CONFIG_NO_HZ
301void rcu_enter_nohz(void); 311void rcu_enter_nohz(void);
302void rcu_exit_nohz(void); 312void rcu_exit_nohz(void);
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index a967c9feb90a..eae29c25fb14 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -98,6 +98,30 @@ void synchronize_rcu(void)
98} 98}
99EXPORT_SYMBOL_GPL(synchronize_rcu); 99EXPORT_SYMBOL_GPL(synchronize_rcu);
100 100
101/**
102 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
103 *
104 * Control will return to the caller some time after a full rcu_bh grace
105 * period has elapsed, in other words after all currently executing rcu_bh
106 * read-side critical sections have completed. RCU read-side critical
107 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
108 * and may be nested.
109 */
110void synchronize_rcu_bh(void)
111{
112 struct rcu_synchronize rcu;
113
114 if (rcu_blocking_is_gp())
115 return;
116
117 init_completion(&rcu.completion);
118 /* Will wake me after RCU finished. */
119 call_rcu_bh(&rcu.head, wakeme_after_rcu);
120 /* Wait for it. */
121 wait_for_completion(&rcu.completion);
122}
123EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
124
101static void rcu_barrier_callback(struct rcu_head *notused) 125static void rcu_barrier_callback(struct rcu_head *notused)
102{ 126{
103 if (atomic_dec_and_test(&rcu_barrier_cpu_count)) 127 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
@@ -129,6 +153,7 @@ static void rcu_barrier_func(void *type)
129static inline void wait_migrated_callbacks(void) 153static inline void wait_migrated_callbacks(void)
130{ 154{
131 wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count)); 155 wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
156 smp_mb(); /* In case we didn't sleep. */
132} 157}
133 158
134/* 159/*
diff --git a/kernel/sched.c b/kernel/sched.c
index 7c9098d186e6..9ae80bec1c1e 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7024,6 +7024,11 @@ fail:
7024 return ret; 7024 return ret;
7025} 7025}
7026 7026
7027#define RCU_MIGRATION_IDLE 0
7028#define RCU_MIGRATION_NEED_QS 1
7029#define RCU_MIGRATION_GOT_QS 2
7030#define RCU_MIGRATION_MUST_SYNC 3
7031
7027/* 7032/*
7028 * migration_thread - this is a highprio system thread that performs 7033 * migration_thread - this is a highprio system thread that performs
7029 * thread migration by bumping thread off CPU then 'pushing' onto 7034 * thread migration by bumping thread off CPU then 'pushing' onto
@@ -7031,6 +7036,7 @@ fail:
7031 */ 7036 */
7032static int migration_thread(void *data) 7037static int migration_thread(void *data)
7033{ 7038{
7039 int badcpu;
7034 int cpu = (long)data; 7040 int cpu = (long)data;
7035 struct rq *rq; 7041 struct rq *rq;
7036 7042
@@ -7065,8 +7071,17 @@ static int migration_thread(void *data)
7065 req = list_entry(head->next, struct migration_req, list); 7071 req = list_entry(head->next, struct migration_req, list);
7066 list_del_init(head->next); 7072 list_del_init(head->next);
7067 7073
7068 spin_unlock(&rq->lock); 7074 if (req->task != NULL) {
7069 __migrate_task(req->task, cpu, req->dest_cpu); 7075 spin_unlock(&rq->lock);
7076 __migrate_task(req->task, cpu, req->dest_cpu);
7077 } else if (likely(cpu == (badcpu = smp_processor_id()))) {
7078 req->dest_cpu = RCU_MIGRATION_GOT_QS;
7079 spin_unlock(&rq->lock);
7080 } else {
7081 req->dest_cpu = RCU_MIGRATION_MUST_SYNC;
7082 spin_unlock(&rq->lock);
7083 WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu);
7084 }
7070 local_irq_enable(); 7085 local_irq_enable();
7071 7086
7072 complete(&req->done); 7087 complete(&req->done);
@@ -10554,3 +10569,113 @@ struct cgroup_subsys cpuacct_subsys = {
10554 .subsys_id = cpuacct_subsys_id, 10569 .subsys_id = cpuacct_subsys_id,
10555}; 10570};
10556#endif /* CONFIG_CGROUP_CPUACCT */ 10571#endif /* CONFIG_CGROUP_CPUACCT */
10572
10573#ifndef CONFIG_SMP
10574
10575int rcu_expedited_torture_stats(char *page)
10576{
10577 return 0;
10578}
10579EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
10580
10581void synchronize_sched_expedited(void)
10582{
10583}
10584EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
10585
10586#else /* #ifndef CONFIG_SMP */
10587
10588static DEFINE_PER_CPU(struct migration_req, rcu_migration_req);
10589static DEFINE_MUTEX(rcu_sched_expedited_mutex);
10590
10591#define RCU_EXPEDITED_STATE_POST -2
10592#define RCU_EXPEDITED_STATE_IDLE -1
10593
10594static int rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
10595
10596int rcu_expedited_torture_stats(char *page)
10597{
10598 int cnt = 0;
10599 int cpu;
10600
10601 cnt += sprintf(&page[cnt], "state: %d /", rcu_expedited_state);
10602 for_each_online_cpu(cpu) {
10603 cnt += sprintf(&page[cnt], " %d:%d",
10604 cpu, per_cpu(rcu_migration_req, cpu).dest_cpu);
10605 }
10606 cnt += sprintf(&page[cnt], "\n");
10607 return cnt;
10608}
10609EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
10610
10611static long synchronize_sched_expedited_count;
10612
10613/*
10614 * Wait for an rcu-sched grace period to elapse, but use "big hammer"
10615 * approach to force grace period to end quickly. This consumes
10616 * significant time on all CPUs, and is thus not recommended for
10617 * any sort of common-case code.
10618 *
10619 * Note that it is illegal to call this function while holding any
10620 * lock that is acquired by a CPU-hotplug notifier. Failing to
10621 * observe this restriction will result in deadlock.
10622 */
10623void synchronize_sched_expedited(void)
10624{
10625 int cpu;
10626 unsigned long flags;
10627 bool need_full_sync = 0;
10628 struct rq *rq;
10629 struct migration_req *req;
10630 long snap;
10631 int trycount = 0;
10632
10633 smp_mb(); /* ensure prior mod happens before capturing snap. */
10634 snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1;
10635 get_online_cpus();
10636 while (!mutex_trylock(&rcu_sched_expedited_mutex)) {
10637 put_online_cpus();
10638 if (trycount++ < 10)
10639 udelay(trycount * num_online_cpus());
10640 else {
10641 synchronize_sched();
10642 return;
10643 }
10644 if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) {
10645 smp_mb(); /* ensure test happens before caller kfree */
10646 return;
10647 }
10648 get_online_cpus();
10649 }
10650 rcu_expedited_state = RCU_EXPEDITED_STATE_POST;
10651 for_each_online_cpu(cpu) {
10652 rq = cpu_rq(cpu);
10653 req = &per_cpu(rcu_migration_req, cpu);
10654 init_completion(&req->done);
10655 req->task = NULL;
10656 req->dest_cpu = RCU_MIGRATION_NEED_QS;
10657 spin_lock_irqsave(&rq->lock, flags);
10658 list_add(&req->list, &rq->migration_queue);
10659 spin_unlock_irqrestore(&rq->lock, flags);
10660 wake_up_process(rq->migration_thread);
10661 }
10662 for_each_online_cpu(cpu) {
10663 rcu_expedited_state = cpu;
10664 req = &per_cpu(rcu_migration_req, cpu);
10665 rq = cpu_rq(cpu);
10666 wait_for_completion(&req->done);
10667 spin_lock_irqsave(&rq->lock, flags);
10668 if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC))
10669 need_full_sync = 1;
10670 req->dest_cpu = RCU_MIGRATION_IDLE;
10671 spin_unlock_irqrestore(&rq->lock, flags);
10672 }
10673 rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
10674 mutex_unlock(&rcu_sched_expedited_mutex);
10675 put_online_cpus();
10676 if (need_full_sync)
10677 synchronize_sched();
10678}
10679EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
10680
10681#endif /* #else #ifndef CONFIG_SMP */