aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree_plugin.h
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2010-10-25 10:39:22 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2010-11-30 01:02:00 -0500
commitdb3a8920995484e5e9a0abaf3bad2c7311b163db (patch)
treee183de92a908466dd5568df151507b2331ff29a5 /kernel/rcutree_plugin.h
parent2d999e03b7c8305b4385dd20992e4ed3e827177b (diff)
rcu: fix race condition in synchronize_sched_expedited()
The new (early 2010) implementation of synchronize_sched_expedited() uses try_stop_cpu() to force a context switch on every CPU. It also permits concurrent calls to synchronize_sched_expedited() to share a single call to try_stop_cpu() through use of an atomically incremented synchronize_sched_expedited_count variable. Unfortunately, this is subject to failure as follows: o Task A invokes synchronize_sched_expedited(), try_stop_cpus() succeeds, but Task A is preempted before getting to the atomic increment of synchronize_sched_expedited_count. o Task B also invokes synchronize_sched_expedited(), with exactly the same outcome as Task A. o Task C also invokes synchronize_sched_expedited(), again with exactly the same outcome as Tasks A and B. o Task D also invokes synchronize_sched_expedited(), but only gets as far as acquiring the mutex within try_stop_cpus() before being preempted, interrupted, or otherwise delayed. o Task E also invokes synchronize_sched_expedited(), but only gets to the snapshotting of synchronize_sched_expedited_count. o Tasks A, B, and C all increment synchronize_sched_expedited_count. o Task E fails to get the mutex, so checks the new value of synchronize_sched_expedited_count. It finds that the value has increased, so (wrongly) assumes that its work has been done, returning despite there having been no expedited grace period since it began. The solution is to have the lowest-numbered CPU atomically increment the synchronize_sched_expedited_count variable within the synchronize_sched_expedited_cpu_stop() function, which is under the protection of the mutex acquired by try_stop_cpus(). However, this also requires that piggybacking tasks wait for three rather than two instances of try_stop_cpu(), because we cannot control the order in which the per-CPU callback function occur. Cc: Tejun Heo <tj@kernel.org> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcutree_plugin.h')
-rw-r--r--kernel/rcutree_plugin.h18
1 files changed, 16 insertions, 2 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 643c8f650dd0..c22c4ef2a0d0 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1041,6 +1041,8 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
1041 * robustness against future implementation changes. 1041 * robustness against future implementation changes.
1042 */ 1042 */
1043 smp_mb(); /* See above comment block. */ 1043 smp_mb(); /* See above comment block. */
1044 if (cpumask_first(cpu_online_mask) == smp_processor_id())
1045 atomic_inc(&synchronize_sched_expedited_count);
1044 return 0; 1046 return 0;
1045} 1047}
1046 1048
@@ -1053,13 +1055,26 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
1053 * Note that it is illegal to call this function while holding any 1055 * Note that it is illegal to call this function while holding any
1054 * lock that is acquired by a CPU-hotplug notifier. Failing to 1056 * lock that is acquired by a CPU-hotplug notifier. Failing to
1055 * observe this restriction will result in deadlock. 1057 * observe this restriction will result in deadlock.
1058 *
1059 * The synchronize_sched_expedited_cpu_stop() function is called
1060 * in stop-CPU context, but in order to keep overhead down to a dull
1061 * roar, we don't force this function to wait for its counterparts
1062 * on other CPUs. One instance of this function will increment the
1063 * synchronize_sched_expedited_count variable per call to
1064 * try_stop_cpus(), but there is no guarantee what order this instance
1065 * will occur in. The worst case is that it is last on one call
1066 * to try_stop_cpus(), and the first on the next call. This means
1067 * that piggybacking requires that synchronize_sched_expedited_count
1068 * be incremented by 3: this guarantees that the piggybacking
1069 * task has waited through an entire cycle of context switches,
1070 * even in the worst case.
1056 */ 1071 */
1057void synchronize_sched_expedited(void) 1072void synchronize_sched_expedited(void)
1058{ 1073{
1059 int snap, trycount = 0; 1074 int snap, trycount = 0;
1060 1075
1061 smp_mb(); /* ensure prior mod happens before capturing snap. */ 1076 smp_mb(); /* ensure prior mod happens before capturing snap. */
1062 snap = atomic_read(&synchronize_sched_expedited_count) + 1; 1077 snap = atomic_read(&synchronize_sched_expedited_count) + 2;
1063 get_online_cpus(); 1078 get_online_cpus();
1064 while (try_stop_cpus(cpu_online_mask, 1079 while (try_stop_cpus(cpu_online_mask,
1065 synchronize_sched_expedited_cpu_stop, 1080 synchronize_sched_expedited_cpu_stop,
@@ -1077,7 +1092,6 @@ void synchronize_sched_expedited(void)
1077 } 1092 }
1078 get_online_cpus(); 1093 get_online_cpus();
1079 } 1094 }
1080 atomic_inc(&synchronize_sched_expedited_count);
1081 smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */ 1095 smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */
1082 put_online_cpus(); 1096 put_online_cpus();
1083} 1097}