aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcu/tree.h
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2016-01-30 20:57:35 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2016-03-31 16:34:08 -0400
commitf6a12f34a448cc8a624070fd365c29c890138a48 (patch)
treec7fc5c50f1bf0c5af3b6a7d5f2dc61f43a4cfca3 /kernel/rcu/tree.h
parentd40a4f09a448382961fa9b1a2f7d4f34813f0273 (diff)
rcu: Enforce expedited-GP fairness via funnel wait queue
The current mutex-based funnel-locking approach used by expedited grace periods is subject to severe unfairness. The problem arises when a few tasks, making a path from leaves to root, all wake up before other tasks do. A new task can then follow this path all the way to the root, which needlessly delays tasks whose grace period is done, but who do not happen to acquire the lock quickly enough. This commit avoids this problem by maintaining per-rcu_node wait queues, along with a per-rcu_node counter that tracks the latest grace period sought by an earlier task to visit this node. If that grace period would satisfy the current task, instead of proceeding up the tree, it waits on the current rcu_node structure using a pair of wait queues provided for that purpose. This decouples awakening of old tasks from the arrival of new tasks. If the wakeups prove to be a bottleneck, additional kthreads can be brought to bear for that purpose. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel/rcu/tree.h')
-rw-r--r--kernel/rcu/tree.h10
1 files changed, 4 insertions, 6 deletions
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 6a8f09446924..f9d4fbb1e014 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -70,7 +70,6 @@
70# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0 } 70# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0 }
71# define RCU_NODE_NAME_INIT { "rcu_node_0" } 71# define RCU_NODE_NAME_INIT { "rcu_node_0" }
72# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0" } 72# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0" }
73# define RCU_EXP_NAME_INIT { "rcu_node_exp_0" }
74#elif NR_CPUS <= RCU_FANOUT_2 73#elif NR_CPUS <= RCU_FANOUT_2
75# define RCU_NUM_LVLS 2 74# define RCU_NUM_LVLS 2
76# define NUM_RCU_LVL_0 1 75# define NUM_RCU_LVL_0 1
@@ -79,7 +78,6 @@
79# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1 } 78# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1 }
80# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1" } 79# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1" }
81# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1" } 80# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1" }
82# define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1" }
83#elif NR_CPUS <= RCU_FANOUT_3 81#elif NR_CPUS <= RCU_FANOUT_3
84# define RCU_NUM_LVLS 3 82# define RCU_NUM_LVLS 3
85# define NUM_RCU_LVL_0 1 83# define NUM_RCU_LVL_0 1
@@ -89,7 +87,6 @@
89# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 } 87# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 }
90# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2" } 88# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2" }
91# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" } 89# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" }
92# define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2" }
93#elif NR_CPUS <= RCU_FANOUT_4 90#elif NR_CPUS <= RCU_FANOUT_4
94# define RCU_NUM_LVLS 4 91# define RCU_NUM_LVLS 4
95# define NUM_RCU_LVL_0 1 92# define NUM_RCU_LVL_0 1
@@ -100,7 +97,6 @@
100# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 } 97# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 }
101# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" } 98# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" }
102# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" } 99# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" }
103# define RCU_EXP_NAME_INIT { "rcu_node_exp_0", "rcu_node_exp_1", "rcu_node_exp_2", "rcu_node_exp_3" }
104#else 100#else
105# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" 101# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
106#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ 102#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
@@ -252,7 +248,9 @@ struct rcu_node {
252 /* Counts of upcoming no-CB GP requests. */ 248 /* Counts of upcoming no-CB GP requests. */
253 raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; 249 raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
254 250
255 struct mutex exp_funnel_mutex ____cacheline_internodealigned_in_smp; 251 spinlock_t exp_lock ____cacheline_internodealigned_in_smp;
252 unsigned long exp_seq_rq;
253 wait_queue_head_t exp_wq[2];
256} ____cacheline_internodealigned_in_smp; 254} ____cacheline_internodealigned_in_smp;
257 255
258/* 256/*
@@ -387,7 +385,6 @@ struct rcu_data {
387#ifdef CONFIG_RCU_FAST_NO_HZ 385#ifdef CONFIG_RCU_FAST_NO_HZ
388 struct rcu_head oom_head; 386 struct rcu_head oom_head;
389#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 387#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
390 struct mutex exp_funnel_mutex;
391 atomic_long_t exp_workdone1; /* # done by others #1. */ 388 atomic_long_t exp_workdone1; /* # done by others #1. */
392 atomic_long_t exp_workdone2; /* # done by others #2. */ 389 atomic_long_t exp_workdone2; /* # done by others #2. */
393 atomic_long_t exp_workdone3; /* # done by others #3. */ 390 atomic_long_t exp_workdone3; /* # done by others #3. */
@@ -504,6 +501,7 @@ struct rcu_state {
504 /* _rcu_barrier(). */ 501 /* _rcu_barrier(). */
505 /* End of fields guarded by barrier_mutex. */ 502 /* End of fields guarded by barrier_mutex. */
506 503
504 struct mutex exp_mutex; /* Serialize expedited GP. */
507 unsigned long expedited_sequence; /* Take a ticket. */ 505 unsigned long expedited_sequence; /* Take a ticket. */
508 atomic_long_t expedited_normal; /* # fallbacks to normal. */ 506 atomic_long_t expedited_normal; /* # fallbacks to normal. */
509 atomic_t expedited_need_qs; /* # CPUs left to check in. */ 507 atomic_t expedited_need_qs; /* # CPUs left to check in. */