diff options
author | Oleg Nesterov <oleg@redhat.com> | 2015-08-21 13:42:44 -0400 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2015-10-06 14:25:04 -0400 |
commit | cc44ca848f5e517aeca9f5eabbe13609a3f71450 (patch) | |
tree | 4380962c131d6585f347aee962b0280176ae4bdd | |
parent | 3836f5337f74fedc15981688c3c31dbf4293ae84 (diff) |
rcu: Create rcu_sync infrastructure
The rcu_sync infrastructure can be thought of as infrastructure to be
used to implement reader-writer primitives having extremely lightweight
readers during times when there are no writers. The first use is in
the percpu_rwsem used by the VFS subsystem.
This infrastructure is functionally equivalent to
struct rcu_sync_struct {
atomic_t counter;
};
/* Check possibility of fast-path read-side operations. */
static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss)
{
return atomic_read(&rss->counter) == 0;
}
/* Tell readers to use slowpaths. */
static inline void rcu_sync_enter(struct rcu_sync_struct *rss)
{
atomic_inc(&rss->counter);
synchronize_sched();
}
/* Allow readers to once again use fastpaths. */
static inline void rcu_sync_exit(struct rcu_sync_struct *rss)
{
synchronize_sched();
atomic_dec(&rss->counter);
}
The main difference is that it records the state and only calls
synchronize_sched() if required. At least some of the calls to
synchronize_sched() will be optimized away when rcu_sync_enter() and
rcu_sync_exit() are invoked repeatedly in quick succession.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
-rw-r--r-- | include/linux/rcu_sync.h | 94 | ||||
-rw-r--r-- | kernel/rcu/Makefile | 2 | ||||
-rw-r--r-- | kernel/rcu/sync.c | 175 |
3 files changed, 270 insertions, 1 deletions
diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h new file mode 100644 index 000000000000..cb044df2e21c --- /dev/null +++ b/include/linux/rcu_sync.h | |||
@@ -0,0 +1,94 @@ | |||
1 | /* | ||
2 | * RCU-based infrastructure for lightweight reader-writer locking | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, you can access it online at | ||
16 | * http://www.gnu.org/licenses/gpl-2.0.html. | ||
17 | * | ||
18 | * Copyright (c) 2015, Red Hat, Inc. | ||
19 | * | ||
20 | * Author: Oleg Nesterov <oleg@redhat.com> | ||
21 | */ | ||
22 | |||
23 | #ifndef _LINUX_RCU_SYNC_H_ | ||
24 | #define _LINUX_RCU_SYNC_H_ | ||
25 | |||
26 | #include <linux/wait.h> | ||
27 | #include <linux/rcupdate.h> | ||
28 | |||
29 | /* Structure to mediate between updaters and fastpath-using readers. */ | ||
30 | struct rcu_sync { | ||
31 | int gp_state; | ||
32 | int gp_count; | ||
33 | wait_queue_head_t gp_wait; | ||
34 | |||
35 | int cb_state; | ||
36 | struct rcu_head cb_head; | ||
37 | |||
38 | void (*sync)(void); | ||
39 | void (*call)(struct rcu_head *, void (*)(struct rcu_head *)); | ||
40 | }; | ||
41 | |||
42 | #define ___RCU_SYNC_INIT(name) \ | ||
43 | .gp_state = 0, \ | ||
44 | .gp_count = 0, \ | ||
45 | .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ | ||
46 | .cb_state = 0 | ||
47 | |||
48 | #define __RCU_SCHED_SYNC_INIT(name) { \ | ||
49 | ___RCU_SYNC_INIT(name), \ | ||
50 | .sync = synchronize_sched, \ | ||
51 | .call = call_rcu_sched, \ | ||
52 | } | ||
53 | |||
54 | #define __RCU_BH_SYNC_INIT(name) { \ | ||
55 | ___RCU_SYNC_INIT(name), \ | ||
56 | .sync = synchronize_rcu_bh, \ | ||
57 | .call = call_rcu_bh, \ | ||
58 | } | ||
59 | |||
60 | #define __RCU_SYNC_INIT(name) { \ | ||
61 | ___RCU_SYNC_INIT(name), \ | ||
62 | .sync = synchronize_rcu, \ | ||
63 | .call = call_rcu, \ | ||
64 | } | ||
65 | |||
66 | #define DEFINE_RCU_SCHED_SYNC(name) \ | ||
67 | struct rcu_sync name = __RCU_SCHED_SYNC_INIT(name) | ||
68 | |||
69 | #define DEFINE_RCU_BH_SYNC(name) \ | ||
70 | struct rcu_sync name = __RCU_BH_SYNC_INIT(name) | ||
71 | |||
72 | #define DEFINE_RCU_SYNC(name) \ | ||
73 | struct rcu_sync name = __RCU_SYNC_INIT(name) | ||
74 | |||
75 | /** | ||
76 | * rcu_sync_is_idle() - Are readers permitted to use their fastpaths? | ||
77 | * @rsp: Pointer to rcu_sync structure to use for synchronization | ||
78 | * | ||
79 | * Returns true if readers are permitted to use their fastpaths. | ||
80 | * Must be invoked within an RCU read-side critical section whose | ||
81 | * flavor matches that of the rcu_sync struture. | ||
82 | */ | ||
83 | static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) | ||
84 | { | ||
85 | return !rsp->gp_state; /* GP_IDLE */ | ||
86 | } | ||
87 | |||
88 | enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC }; | ||
89 | |||
90 | extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); | ||
91 | extern void rcu_sync_enter(struct rcu_sync *); | ||
92 | extern void rcu_sync_exit(struct rcu_sync *); | ||
93 | |||
94 | #endif /* _LINUX_RCU_SYNC_H_ */ | ||
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile index 50a808424b06..61a16569ffbf 100644 --- a/kernel/rcu/Makefile +++ b/kernel/rcu/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-y += update.o | 1 | obj-y += update.o sync.o |
2 | obj-$(CONFIG_SRCU) += srcu.o | 2 | obj-$(CONFIG_SRCU) += srcu.o |
3 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | 3 | obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o |
4 | obj-$(CONFIG_TREE_RCU) += tree.o | 4 | obj-$(CONFIG_TREE_RCU) += tree.o |
diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c new file mode 100644 index 000000000000..0a11df43be23 --- /dev/null +++ b/kernel/rcu/sync.c | |||
@@ -0,0 +1,175 @@ | |||
1 | /* | ||
2 | * RCU-based infrastructure for lightweight reader-writer locking | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, you can access it online at | ||
16 | * http://www.gnu.org/licenses/gpl-2.0.html. | ||
17 | * | ||
18 | * Copyright (c) 2015, Red Hat, Inc. | ||
19 | * | ||
20 | * Author: Oleg Nesterov <oleg@redhat.com> | ||
21 | */ | ||
22 | |||
23 | #include <linux/rcu_sync.h> | ||
24 | #include <linux/sched.h> | ||
25 | |||
26 | enum { GP_IDLE = 0, GP_PENDING, GP_PASSED }; | ||
27 | enum { CB_IDLE = 0, CB_PENDING, CB_REPLAY }; | ||
28 | |||
29 | #define rss_lock gp_wait.lock | ||
30 | |||
31 | /** | ||
32 | * rcu_sync_init() - Initialize an rcu_sync structure | ||
33 | * @rsp: Pointer to rcu_sync structure to be initialized | ||
34 | * @type: Flavor of RCU with which to synchronize rcu_sync structure | ||
35 | */ | ||
36 | void rcu_sync_init(struct rcu_sync *rsp, enum rcu_sync_type type) | ||
37 | { | ||
38 | memset(rsp, 0, sizeof(*rsp)); | ||
39 | init_waitqueue_head(&rsp->gp_wait); | ||
40 | |||
41 | switch (type) { | ||
42 | case RCU_SYNC: | ||
43 | rsp->sync = synchronize_rcu; | ||
44 | rsp->call = call_rcu; | ||
45 | break; | ||
46 | |||
47 | case RCU_SCHED_SYNC: | ||
48 | rsp->sync = synchronize_sched; | ||
49 | rsp->call = call_rcu_sched; | ||
50 | break; | ||
51 | |||
52 | case RCU_BH_SYNC: | ||
53 | rsp->sync = synchronize_rcu_bh; | ||
54 | rsp->call = call_rcu_bh; | ||
55 | break; | ||
56 | } | ||
57 | } | ||
58 | |||
59 | /** | ||
60 | * rcu_sync_enter() - Force readers onto slowpath | ||
61 | * @rsp: Pointer to rcu_sync structure to use for synchronization | ||
62 | * | ||
63 | * This function is used by updaters who need readers to make use of | ||
64 | * a slowpath during the update. After this function returns, all | ||
65 | * subsequent calls to rcu_sync_is_idle() will return false, which | ||
66 | * tells readers to stay off their fastpaths. A later call to | ||
67 | * rcu_sync_exit() re-enables reader slowpaths. | ||
68 | * | ||
69 | * When called in isolation, rcu_sync_enter() must wait for a grace | ||
70 | * period, however, closely spaced calls to rcu_sync_enter() can | ||
71 | * optimize away the grace-period wait via a state machine implemented | ||
72 | * by rcu_sync_enter(), rcu_sync_exit(), and rcu_sync_func(). | ||
73 | */ | ||
74 | void rcu_sync_enter(struct rcu_sync *rsp) | ||
75 | { | ||
76 | bool need_wait, need_sync; | ||
77 | |||
78 | spin_lock_irq(&rsp->rss_lock); | ||
79 | need_wait = rsp->gp_count++; | ||
80 | need_sync = rsp->gp_state == GP_IDLE; | ||
81 | if (need_sync) | ||
82 | rsp->gp_state = GP_PENDING; | ||
83 | spin_unlock_irq(&rsp->rss_lock); | ||
84 | |||
85 | BUG_ON(need_wait && need_sync); | ||
86 | |||
87 | if (need_sync) { | ||
88 | rsp->sync(); | ||
89 | rsp->gp_state = GP_PASSED; | ||
90 | wake_up_all(&rsp->gp_wait); | ||
91 | } else if (need_wait) { | ||
92 | wait_event(rsp->gp_wait, rsp->gp_state == GP_PASSED); | ||
93 | } else { | ||
94 | /* | ||
95 | * Possible when there's a pending CB from a rcu_sync_exit(). | ||
96 | * Nobody has yet been allowed the 'fast' path and thus we can | ||
97 | * avoid doing any sync(). The callback will get 'dropped'. | ||
98 | */ | ||
99 | BUG_ON(rsp->gp_state != GP_PASSED); | ||
100 | } | ||
101 | } | ||
102 | |||
103 | /** | ||
104 | * rcu_sync_func() - Callback function managing reader access to fastpath | ||
105 | * @rsp: Pointer to rcu_sync structure to use for synchronization | ||
106 | * | ||
107 | * This function is passed to one of the call_rcu() functions by | ||
108 | * rcu_sync_exit(), so that it is invoked after a grace period following the | ||
109 | * that invocation of rcu_sync_exit(). It takes action based on events that | ||
110 | * have taken place in the meantime, so that closely spaced rcu_sync_enter() | ||
111 | * and rcu_sync_exit() pairs need not wait for a grace period. | ||
112 | * | ||
113 | * If another rcu_sync_enter() is invoked before the grace period | ||
114 | * ended, reset state to allow the next rcu_sync_exit() to let the | ||
115 | * readers back onto their fastpaths (after a grace period). If both | ||
116 | * another rcu_sync_enter() and its matching rcu_sync_exit() are invoked | ||
117 | * before the grace period ended, re-invoke call_rcu() on behalf of that | ||
118 | * rcu_sync_exit(). Otherwise, set all state back to idle so that readers | ||
119 | * can again use their fastpaths. | ||
120 | */ | ||
121 | static void rcu_sync_func(struct rcu_head *rcu) | ||
122 | { | ||
123 | struct rcu_sync *rsp = container_of(rcu, struct rcu_sync, cb_head); | ||
124 | unsigned long flags; | ||
125 | |||
126 | BUG_ON(rsp->gp_state != GP_PASSED); | ||
127 | BUG_ON(rsp->cb_state == CB_IDLE); | ||
128 | |||
129 | spin_lock_irqsave(&rsp->rss_lock, flags); | ||
130 | if (rsp->gp_count) { | ||
131 | /* | ||
132 | * A new rcu_sync_begin() has happened; drop the callback. | ||
133 | */ | ||
134 | rsp->cb_state = CB_IDLE; | ||
135 | } else if (rsp->cb_state == CB_REPLAY) { | ||
136 | /* | ||
137 | * A new rcu_sync_exit() has happened; requeue the callback | ||
138 | * to catch a later GP. | ||
139 | */ | ||
140 | rsp->cb_state = CB_PENDING; | ||
141 | rsp->call(&rsp->cb_head, rcu_sync_func); | ||
142 | } else { | ||
143 | /* | ||
144 | * We're at least a GP after rcu_sync_exit(); eveybody will now | ||
145 | * have observed the write side critical section. Let 'em rip!. | ||
146 | */ | ||
147 | rsp->cb_state = CB_IDLE; | ||
148 | rsp->gp_state = GP_IDLE; | ||
149 | } | ||
150 | spin_unlock_irqrestore(&rsp->rss_lock, flags); | ||
151 | } | ||
152 | |||
153 | /** | ||
154 | * rcu_sync_exit() - Allow readers back onto fast patch after grace period | ||
155 | * @rsp: Pointer to rcu_sync structure to use for synchronization | ||
156 | * | ||
157 | * This function is used by updaters who have completed, and can therefore | ||
158 | * now allow readers to make use of their fastpaths after a grace period | ||
159 | * has elapsed. After this grace period has completed, all subsequent | ||
160 | * calls to rcu_sync_is_idle() will return true, which tells readers that | ||
161 | * they can once again use their fastpaths. | ||
162 | */ | ||
163 | void rcu_sync_exit(struct rcu_sync *rsp) | ||
164 | { | ||
165 | spin_lock_irq(&rsp->rss_lock); | ||
166 | if (!--rsp->gp_count) { | ||
167 | if (rsp->cb_state == CB_IDLE) { | ||
168 | rsp->cb_state = CB_PENDING; | ||
169 | rsp->call(&rsp->cb_head, rcu_sync_func); | ||
170 | } else if (rsp->cb_state == CB_PENDING) { | ||
171 | rsp->cb_state = CB_REPLAY; | ||
172 | } | ||
173 | } | ||
174 | spin_unlock_irq(&rsp->rss_lock); | ||
175 | } | ||