From bf66f18e79e34c421bbd8f6511e2c556b779df2f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 15:09:10 -0800 Subject: rcu: Add force_quiescent_state() testing to rcutorture Add force_quiescent_state() testing to rcutorture, with a separate thread that repeatedly invokes force_quiescent_state() in bursts. This can greatly increase the probability of encountering certain types of race conditions. Suggested-by: Josh Triplett Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1262646551116-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree_plugin.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 37fbccdf41d5..f11ebd44b454 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -61,6 +61,15 @@ long rcu_batches_completed(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed); +/* + * Force a quiescent state for preemptible RCU. + */ +void rcu_force_quiescent_state(void) +{ + force_quiescent_state(&rcu_preempt_state, 0); +} +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); + /* * Record a preemptable-RCU quiescent state for the specified CPU. Note * that this just means that the task currently running on the CPU is @@ -712,6 +721,16 @@ long rcu_batches_completed(void) } EXPORT_SYMBOL_GPL(rcu_batches_completed); +/* + * Force a quiescent state for RCU, which, because there is no preemptible + * RCU, becomes the same as rcu-sched. + */ +void rcu_force_quiescent_state(void) +{ + rcu_sched_force_quiescent_state(); +} +EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); + /* * Because preemptable RCU does not exist, we never have to check for * CPUs being in quiescent states. -- cgit v1.2.2 From cba8244a0f1c277b6b1e48ed6504fa434119e24d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 4 Jan 2010 16:04:01 -0800 Subject: rcu: Add debug check for too many rcu_read_unlock() TREE_PREEMPT_RCU maintains an rcu_read_lock_nesting counter in the task structure, which happens to be a signed int. So this patch adds a check for this counter being negative at the end of __rcu_read_unlock(). This check is under CONFIG_PROVE_LOCKING, so can be thought of as being part of lockdep. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <12626498423064-git-send-email-> Signed-off-by: Ingo Molnar --- kernel/rcutree_plugin.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index f11ebd44b454..e77cdf321e13 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -304,6 +304,9 @@ void __rcu_read_unlock(void) if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 && unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) rcu_read_unlock_special(t); +#ifdef CONFIG_PROVE_LOCKING + WARN_ON_ONCE(ACCESS_ONCE(t->rcu_read_lock_nesting) < 0); +#endif /* #ifdef CONFIG_PROVE_LOCKING */ } EXPORT_SYMBOL_GPL(__rcu_read_unlock); -- cgit v1.2.2 From 8bd93a2c5d4cab2ae17d06350daa7dbf546a4634 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:04:59 -0800 Subject: rcu: Accelerate grace period if last non-dynticked CPU Currently, rcu_needs_cpu() simply checks whether the current CPU has an outstanding RCU callback, which means that the last CPU to go into dyntick-idle mode might wait a few ticks for the relevant grace periods to complete. However, if all the other CPUs are in dyntick-idle mode, and if this CPU is in a quiescent state (which it is for RCU-bh and RCU-sched any time that we are considering going into dyntick-idle mode), then the grace period is instantly complete. This patch therefore repeatedly invokes the RCU grace-period machinery in order to force any needed grace periods to complete quickly. It does so a limited number of times in order to prevent starvation by an RCU callback function that might pass itself to call_rcu(). However, if any CPU other than the current one is not in dyntick-idle mode, fall back to simply checking (with fix to bug noted by Lai Jiangshan). Also, take advantage of last grace-period forcing, the opportunity to do so noted by Steve Rostedt. And apply simplified #ifdef condition suggested by Frederic Weisbecker. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-15-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/rcutree_plugin.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index e77cdf321e13..a82566696b0b 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -906,3 +906,72 @@ static void __init __rcu_init_preempt(void) } #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ + +#if !defined(CONFIG_RCU_FAST_NO_HZ) + +/* + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. This function is part of the RCU implementation; it is -not- + * an exported member of the RCU API. + * + * Because we have preemptible RCU, just check whether this CPU needs + * any flavor of RCU. Do not chew up lots of CPU cycles with preemption + * disabled in a most-likely vain attempt to cause RCU not to need this CPU. + */ +int rcu_needs_cpu(int cpu) +{ + return rcu_needs_cpu_quick_check(cpu); +} + +#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ + +#define RCU_NEEDS_CPU_FLUSHES 5 + +/* + * Check to see if any future RCU-related work will need to be done + * by the current CPU, even if none need be done immediately, returning + * 1 if so. This function is part of the RCU implementation; it is -not- + * an exported member of the RCU API. + * + * Because we are not supporting preemptible RCU, attempt to accelerate + * any current grace periods so that RCU no longer needs this CPU, but + * only if all other CPUs are already in dynticks-idle mode. This will + * allow the CPU cores to be powered down immediately, as opposed to after + * waiting many milliseconds for grace periods to elapse. + */ +int rcu_needs_cpu(int cpu) +{ + int c = 1; + int i; + int thatcpu; + + /* Don't bother unless we are the last non-dyntick-idle CPU. */ + for_each_cpu_not(thatcpu, nohz_cpu_mask) + if (thatcpu != cpu) + return rcu_needs_cpu_quick_check(cpu); + + /* Try to push remaining RCU-sched and RCU-bh callbacks through. */ + for (i = 0; i < RCU_NEEDS_CPU_FLUSHES && c; i++) { + c = 0; + if (per_cpu(rcu_sched_data, cpu).nxtlist) { + rcu_sched_qs(cpu); + force_quiescent_state(&rcu_sched_state, 0); + __rcu_process_callbacks(&rcu_sched_state, + &per_cpu(rcu_sched_data, cpu)); + c = !!per_cpu(rcu_sched_data, cpu).nxtlist; + } + if (per_cpu(rcu_bh_data, cpu).nxtlist) { + rcu_bh_qs(cpu); + force_quiescent_state(&rcu_bh_state, 0); + __rcu_process_callbacks(&rcu_bh_state, + &per_cpu(rcu_bh_data, cpu)); + c = !!per_cpu(rcu_bh_data, cpu).nxtlist; + } + } + + /* If RCU callbacks are still pending, RCU still needs this CPU. */ + return c; +} + +#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ -- cgit v1.2.2 From 1304afb225288a2e250d6a7495462c28e5509cbb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:05:02 -0800 Subject: rcu: Convert to raw_spinlocks The spinlocks in rcutree need to be real spinlocks in preempt-rt. Convert them to raw_spinlocks. Signed-off-by: Thomas Gleixner Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-18-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/rcutree_plugin.h | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index a82566696b0b..a8b2e834fd3a 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -111,7 +111,7 @@ static void rcu_preempt_note_context_switch(int cpu) /* Possibly blocking in an RCU read-side critical section. */ rdp = rcu_preempt_state.rda[cpu]; rnp = rdp->mynode; - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; t->rcu_blocked_node = rnp; @@ -132,7 +132,7 @@ static void rcu_preempt_note_context_switch(int cpu) WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1; list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]); - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -189,7 +189,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) struct rcu_node *rnp_p; if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); return; /* Still need more quiescent states! */ } @@ -206,8 +206,8 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) /* Report up the rest of the hierarchy. */ mask = rnp->grpmask; - spin_unlock(&rnp->lock); /* irqs remain disabled. */ - spin_lock(&rnp_p->lock); /* irqs already disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_lock(&rnp_p->lock); /* irqs already disabled. */ rcu_report_qs_rnp(mask, &rcu_preempt_state, rnp_p, flags); } @@ -257,10 +257,10 @@ static void rcu_read_unlock_special(struct task_struct *t) */ for (;;) { rnp = t->rcu_blocked_node; - spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ if (rnp == t->rcu_blocked_node) break; - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } empty = !rcu_preempted_readers(rnp); empty_exp = !rcu_preempted_readers_exp(rnp); @@ -274,7 +274,7 @@ static void rcu_read_unlock_special(struct task_struct *t) * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. */ if (empty) - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); else rcu_report_unblock_qs_rnp(rnp, flags); @@ -324,12 +324,12 @@ static void rcu_print_task_stall(struct rcu_node *rnp) struct task_struct *t; if (rcu_preempted_readers(rnp)) { - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); phase = rnp->gpnum & 0x1; lp = &rnp->blocked_tasks[phase]; list_for_each_entry(t, lp, rcu_node_entry) printk(" P%d", t->pid); - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } } @@ -400,11 +400,11 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp, lp_root = &rnp_root->blocked_tasks[i]; while (!list_empty(lp)) { tp = list_entry(lp->next, typeof(*tp), rcu_node_entry); - spin_lock(&rnp_root->lock); /* irqs already disabled */ + raw_spin_lock(&rnp_root->lock); /* irqs already disabled */ list_del(&tp->rcu_node_entry); tp->rcu_blocked_node = rnp_root; list_add(&tp->rcu_node_entry, lp_root); - spin_unlock(&rnp_root->lock); /* irqs remain disabled */ + raw_spin_unlock(&rnp_root->lock); /* irqs remain disabled */ } } return retval; @@ -528,7 +528,7 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) unsigned long flags; unsigned long mask; - spin_lock_irqsave(&rnp->lock, flags); + raw_spin_lock_irqsave(&rnp->lock, flags); for (;;) { if (!sync_rcu_preempt_exp_done(rnp)) break; @@ -537,12 +537,12 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp) break; } mask = rnp->grpmask; - spin_unlock(&rnp->lock); /* irqs remain disabled */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ rnp = rnp->parent; - spin_lock(&rnp->lock); /* irqs already disabled */ + raw_spin_lock(&rnp->lock); /* irqs already disabled */ rnp->expmask &= ~mask; } - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } /* @@ -557,11 +557,11 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp) { int must_wait; - spin_lock(&rnp->lock); /* irqs already disabled */ + raw_spin_lock(&rnp->lock); /* irqs already disabled */ list_splice_init(&rnp->blocked_tasks[0], &rnp->blocked_tasks[2]); list_splice_init(&rnp->blocked_tasks[1], &rnp->blocked_tasks[3]); must_wait = rcu_preempted_readers_exp(rnp); - spin_unlock(&rnp->lock); /* irqs remain disabled */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ if (!must_wait) rcu_report_exp_rnp(rsp, rnp); } @@ -606,13 +606,13 @@ void synchronize_rcu_expedited(void) /* force all RCU readers onto blocked_tasks[]. */ synchronize_sched_expedited(); - spin_lock_irqsave(&rsp->onofflock, flags); + raw_spin_lock_irqsave(&rsp->onofflock, flags); /* Initialize ->expmask for all non-leaf rcu_node structures. */ rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) { - spin_lock(&rnp->lock); /* irqs already disabled. */ + raw_spin_lock(&rnp->lock); /* irqs already disabled. */ rnp->expmask = rnp->qsmaskinit; - spin_unlock(&rnp->lock); /* irqs remain disabled. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ } /* Snapshot current state of ->blocked_tasks[] lists. */ @@ -621,7 +621,7 @@ void synchronize_rcu_expedited(void) if (NUM_RCU_NODES > 1) sync_rcu_preempt_exp_init(rsp, rcu_get_root(rsp)); - spin_unlock_irqrestore(&rsp->onofflock, flags); + raw_spin_unlock_irqrestore(&rsp->onofflock, flags); /* Wait for snapshotted ->blocked_tasks[] lists to drain. */ rnp = rcu_get_root(rsp); @@ -756,7 +756,7 @@ static int rcu_preempted_readers(struct rcu_node *rnp) /* Because preemptible RCU does not exist, no quieting of tasks. */ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) { - spin_unlock_irqrestore(&rnp->lock, flags); + raw_spin_unlock_irqrestore(&rnp->lock, flags); } #endif /* #ifdef CONFIG_HOTPLUG_CPU */ -- cgit v1.2.2 From 3acd9eb31c5f7eb97cb2009fa41472710fb4a10f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:05:03 -0800 Subject: rcu: Fix deadlock in TREE_PREEMPT_RCU CPU stall detection Under TREE_PREEMPT_RCU, print_other_cpu_stall() invokes rcu_print_task_stall() with the root rcu_node structure's ->lock held, and rcu_print_task_stall() acquires that same lock for self-deadlock. Fix this by removing the lock acquisition from rcu_print_task_stall(), and making all callers acquire the lock instead. Tested-by: John Kacur Tested-by: Thomas Gleixner Located-by: Thomas Gleixner Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-19-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/rcutree_plugin.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index a8b2e834fd3a..aecfe37e0117 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -318,18 +318,15 @@ EXPORT_SYMBOL_GPL(__rcu_read_unlock); */ static void rcu_print_task_stall(struct rcu_node *rnp) { - unsigned long flags; struct list_head *lp; int phase; struct task_struct *t; if (rcu_preempted_readers(rnp)) { - raw_spin_lock_irqsave(&rnp->lock, flags); phase = rnp->gpnum & 0x1; lp = &rnp->blocked_tasks[phase]; list_for_each_entry(t, lp, rcu_node_entry) printk(" P%d", t->pid); - raw_spin_unlock_irqrestore(&rnp->lock, flags); } } -- cgit v1.2.2 From 1ed509a225008c9e8c0644fbd22168e09a7383a0 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 22 Feb 2010 17:05:05 -0800 Subject: rcu: Add RCU_CPU_STALL_VERBOSE to dump detailed per-task information When RCU detects a grace-period stall, it currently just prints out the PID of any tasks doing the stalling. This patch adds RCU_CPU_STALL_VERBOSE, which enables the more-verbose reporting from sched_show_task(). Suggested-by: Thomas Gleixner Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1266887105-1528-21-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/rcutree_plugin.h | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index aecfe37e0117..3516de7091a1 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -312,6 +312,50 @@ EXPORT_SYMBOL_GPL(__rcu_read_unlock); #ifdef CONFIG_RCU_CPU_STALL_DETECTOR +#ifdef CONFIG_RCU_CPU_STALL_VERBOSE + +/* + * Dump detailed information for all tasks blocking the current RCU + * grace period on the specified rcu_node structure. + */ +static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp) +{ + unsigned long flags; + struct list_head *lp; + int phase; + struct task_struct *t; + + if (rcu_preempted_readers(rnp)) { + raw_spin_lock_irqsave(&rnp->lock, flags); + phase = rnp->gpnum & 0x1; + lp = &rnp->blocked_tasks[phase]; + list_for_each_entry(t, lp, rcu_node_entry) + sched_show_task(t); + raw_spin_unlock_irqrestore(&rnp->lock, flags); + } +} + +/* + * Dump detailed information for all tasks blocking the current RCU + * grace period. + */ +static void rcu_print_detail_task_stall(struct rcu_state *rsp) +{ + struct rcu_node *rnp = rcu_get_root(rsp); + + rcu_print_detail_task_stall_rnp(rnp); + rcu_for_each_leaf_node(rsp, rnp) + rcu_print_detail_task_stall_rnp(rnp); +} + +#else /* #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ + +static void rcu_print_detail_task_stall(struct rcu_state *rsp) +{ +} + +#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_VERBOSE */ + /* * Scan the current list of tasks blocked within RCU read-side critical * sections, printing out the tid of each. @@ -760,6 +804,14 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags) #ifdef CONFIG_RCU_CPU_STALL_DETECTOR +/* + * Because preemptable RCU does not exist, we never have to check for + * tasks blocked within RCU read-side critical sections. + */ +static void rcu_print_detail_task_stall(struct rcu_state *rsp) +{ +} + /* * Because preemptable RCU does not exist, we never have to check for * tasks blocked within RCU read-side critical sections. -- cgit v1.2.2 From a47cd880b50e14b0b6f5e9d426ae9a2676c9c474 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 26 Feb 2010 16:38:56 -0800 Subject: rcu: Fix accelerated grace periods for last non-dynticked CPU It is invalid to invoke __rcu_process_callbacks() with irqs disabled, so do it indirectly via raise_softirq(). This requires a state-machine implementation to cycle through the grace-period machinery the required number of times. Located-by: Ingo Molnar Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267231138-27856-1-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/rcutree_plugin.h | 73 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 20 deletions(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3516de7091a1..ed241fc478f0 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -973,9 +973,19 @@ int rcu_needs_cpu(int cpu) return rcu_needs_cpu_quick_check(cpu); } +/* + * Check to see if we need to continue a callback-flush operations to + * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle + * entry is not configured, so we never do need to. + */ +static void rcu_needs_cpu_flush(void) +{ +} + #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ #define RCU_NEEDS_CPU_FLUSHES 5 +static DEFINE_PER_CPU(int, rcu_dyntick_drain); /* * Check to see if any future RCU-related work will need to be done @@ -988,39 +998,62 @@ int rcu_needs_cpu(int cpu) * only if all other CPUs are already in dynticks-idle mode. This will * allow the CPU cores to be powered down immediately, as opposed to after * waiting many milliseconds for grace periods to elapse. + * + * Because it is not legal to invoke rcu_process_callbacks() with irqs + * disabled, we do one pass of force_quiescent_state(), then do a + * raise_softirq() to cause rcu_process_callbacks() to be invoked later. + * The per-cpu rcu_dyntick_drain variable controls the sequencing. */ int rcu_needs_cpu(int cpu) { - int c = 1; - int i; + int c = 0; int thatcpu; /* Don't bother unless we are the last non-dyntick-idle CPU. */ for_each_cpu_not(thatcpu, nohz_cpu_mask) - if (thatcpu != cpu) + if (thatcpu != cpu) { + per_cpu(rcu_dyntick_drain, cpu) = 0; return rcu_needs_cpu_quick_check(cpu); - - /* Try to push remaining RCU-sched and RCU-bh callbacks through. */ - for (i = 0; i < RCU_NEEDS_CPU_FLUSHES && c; i++) { - c = 0; - if (per_cpu(rcu_sched_data, cpu).nxtlist) { - rcu_sched_qs(cpu); - force_quiescent_state(&rcu_sched_state, 0); - __rcu_process_callbacks(&rcu_sched_state, - &per_cpu(rcu_sched_data, cpu)); - c = !!per_cpu(rcu_sched_data, cpu).nxtlist; - } - if (per_cpu(rcu_bh_data, cpu).nxtlist) { - rcu_bh_qs(cpu); - force_quiescent_state(&rcu_bh_state, 0); - __rcu_process_callbacks(&rcu_bh_state, - &per_cpu(rcu_bh_data, cpu)); - c = !!per_cpu(rcu_bh_data, cpu).nxtlist; } + + /* Check and update the rcu_dyntick_drain sequencing. */ + if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { + /* First time through, initialize the counter. */ + per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; + } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { + /* We have hit the limit, so time to give up. */ + return rcu_needs_cpu_quick_check(cpu); + } + + /* Do one step pushing remaining RCU callbacks through. */ + if (per_cpu(rcu_sched_data, cpu).nxtlist) { + rcu_sched_qs(cpu); + force_quiescent_state(&rcu_sched_state, 0); + c = c || per_cpu(rcu_sched_data, cpu).nxtlist; + } + if (per_cpu(rcu_bh_data, cpu).nxtlist) { + rcu_bh_qs(cpu); + force_quiescent_state(&rcu_bh_state, 0); + c = c || per_cpu(rcu_bh_data, cpu).nxtlist; } /* If RCU callbacks are still pending, RCU still needs this CPU. */ + if (c) + raise_softirq(RCU_SOFTIRQ); return c; } +/* + * Check to see if we need to continue a callback-flush operations to + * allow the last CPU to enter dyntick-idle mode. + */ +static void rcu_needs_cpu_flush(void) +{ + int cpu = smp_processor_id(); + + if (per_cpu(rcu_dyntick_drain, cpu) <= 0) + return; + (void)rcu_needs_cpu(cpu); +} + #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ -- cgit v1.2.2 From 71da81324c83ef65bb196c7f874ac1c6996d8287 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 26 Feb 2010 16:38:58 -0800 Subject: rcu: Fix accelerated GPs for last non-dynticked CPU This patch disables irqs across the call to rcu_needs_cpu(). It also enforces a hold-off period so that the idle loop doesn't softirq itself to death when there are lots of RCU callbacks in flight on the last non-dynticked CPU. Signed-off-by: Paul E. McKenney Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com LKML-Reference: <1267231138-27856-3-git-send-email-paulmck@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar --- kernel/rcutree_plugin.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'kernel/rcutree_plugin.h') diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index ed241fc478f0..464ad2cdee00 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -986,6 +986,7 @@ static void rcu_needs_cpu_flush(void) #define RCU_NEEDS_CPU_FLUSHES 5 static DEFINE_PER_CPU(int, rcu_dyntick_drain); +static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); /* * Check to see if any future RCU-related work will need to be done @@ -1013,6 +1014,7 @@ int rcu_needs_cpu(int cpu) for_each_cpu_not(thatcpu, nohz_cpu_mask) if (thatcpu != cpu) { per_cpu(rcu_dyntick_drain, cpu) = 0; + per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; return rcu_needs_cpu_quick_check(cpu); } @@ -1022,6 +1024,7 @@ int rcu_needs_cpu(int cpu) per_cpu(rcu_dyntick_drain, cpu) = RCU_NEEDS_CPU_FLUSHES; } else if (--per_cpu(rcu_dyntick_drain, cpu) <= 0) { /* We have hit the limit, so time to give up. */ + per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; return rcu_needs_cpu_quick_check(cpu); } @@ -1038,8 +1041,10 @@ int rcu_needs_cpu(int cpu) } /* If RCU callbacks are still pending, RCU still needs this CPU. */ - if (c) + if (c) { raise_softirq(RCU_SOFTIRQ); + per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; + } return c; } @@ -1050,10 +1055,13 @@ int rcu_needs_cpu(int cpu) static void rcu_needs_cpu_flush(void) { int cpu = smp_processor_id(); + unsigned long flags; if (per_cpu(rcu_dyntick_drain, cpu) <= 0) return; + local_irq_save(flags); (void)rcu_needs_cpu(cpu); + local_irq_restore(flags); } #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ -- cgit v1.2.2