diff options
author | Paul E. McKenney <paul.mckenney@linaro.org> | 2012-02-05 10:42:44 -0500 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2012-04-30 13:48:19 -0400 |
commit | cef50120b61c2af4ce34bc165e19cad66296f93d (patch) | |
tree | 963a9473155bcf6a8aa12ce92ff842242c9c3575 /include | |
parent | fae4b54f28f034d228fa3bfc98858c698b64e89c (diff) |
rcu: Direct algorithmic SRCU implementation
The current implementation of synchronize_srcu_expedited() can cause
severe OS jitter due to its use of synchronize_sched(), which in turn
invokes try_stop_cpus(), which causes each CPU to be sent an IPI.
This can result in severe performance degradation for real-time workloads
and especially for short-interation-length HPC workloads. Furthermore,
because only one instance of try_stop_cpus() can be making forward progress
at a given time, only one instance of synchronize_srcu_expedited() can
make forward progress at a time, even if they are all operating on
distinct srcu_struct structures.
This commit, inspired by an earlier implementation by Peter Zijlstra
(https://lkml.org/lkml/2012/1/31/211) and by further offline discussions,
takes a strictly algorithmic bits-in-memory approach. This has the
disadvantage of requiring one explicit memory-barrier instruction in
each of srcu_read_lock() and srcu_read_unlock(), but on the other hand
completely dispenses with OS jitter and furthermore allows SRCU to be
used freely by CPUs that RCU believes to be idle or offline.
The update-side implementation handles the single read-side memory
barrier by rechecking the per-CPU counters after summing them and
by running through the update-side state machine twice.
This implementation has passed moderate rcutorture testing on both
x86 and Power. Also updated to use this_cpu_ptr() instead of per_cpu_ptr(),
as suggested by Peter Zijlstra.
Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/srcu.h | 10 |
1 files changed, 8 insertions, 2 deletions
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index d3d5fa54f25e..a478c8eb8479 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h | |||
@@ -31,13 +31,19 @@ | |||
31 | #include <linux/rcupdate.h> | 31 | #include <linux/rcupdate.h> |
32 | 32 | ||
33 | struct srcu_struct_array { | 33 | struct srcu_struct_array { |
34 | int c[2]; | 34 | unsigned long c[2]; |
35 | }; | 35 | }; |
36 | 36 | ||
37 | /* Bit definitions for field ->c above and ->snap below. */ | ||
38 | #define SRCU_USAGE_BITS 2 | ||
39 | #define SRCU_REF_MASK (ULONG_MAX >> SRCU_USAGE_BITS) | ||
40 | #define SRCU_USAGE_COUNT (SRCU_REF_MASK + 1) | ||
41 | |||
37 | struct srcu_struct { | 42 | struct srcu_struct { |
38 | int completed; | 43 | unsigned completed; |
39 | struct srcu_struct_array __percpu *per_cpu_ref; | 44 | struct srcu_struct_array __percpu *per_cpu_ref; |
40 | struct mutex mutex; | 45 | struct mutex mutex; |
46 | unsigned long snap[NR_CPUS]; | ||
41 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 47 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
42 | struct lockdep_map dep_map; | 48 | struct lockdep_map dep_map; |
43 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 49 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |