diff options
author | Eric Paris <eparis@redhat.com> | 2010-07-28 10:18:38 -0400 |
---|---|---|
committer | Eric Paris <eparis@redhat.com> | 2010-07-28 10:18:52 -0400 |
commit | 75c1be487a690db43da2c1234fcacd84c982803c (patch) | |
tree | b38ce47f157d3b0eff7ac6eb4756a4b390ac35ae | |
parent | 700307a29ad61090dcf1d45f8f4a135f5e9211ae (diff) |
fsnotify: srcu to protect read side of inode and vfsmount locks
Currently reading the inode->i_fsnotify_marks or
vfsmount->mnt_fsnotify_marks lists are protected by a spinlock on both the
read and the write side. This patch protects the read side of those lists
with a new single srcu.
Signed-off-by: Eric Paris <eparis@redhat.com>
-rw-r--r-- | fs/notify/fsnotify.c | 69 | ||||
-rw-r--r-- | fs/notify/fsnotify.h | 5 | ||||
-rw-r--r-- | fs/notify/group.c | 16 | ||||
-rw-r--r-- | fs/notify/mark.c | 60 | ||||
-rw-r--r-- | include/linux/fsnotify_backend.h | 1 |
5 files changed, 111 insertions, 40 deletions
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 4788c866473a..4678b416241e 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c | |||
@@ -144,14 +144,15 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is) | |||
144 | { | 144 | { |
145 | struct fsnotify_mark *mark; | 145 | struct fsnotify_mark *mark; |
146 | struct hlist_node *node; | 146 | struct hlist_node *node; |
147 | int idx; | ||
148 | |||
149 | idx = srcu_read_lock(&fsnotify_mark_srcu); | ||
147 | 150 | ||
148 | if (!hlist_empty(&inode->i_fsnotify_marks)) { | 151 | if (!hlist_empty(&inode->i_fsnotify_marks)) { |
149 | spin_lock(&inode->i_lock); | 152 | hlist_for_each_entry_rcu(mark, node, &inode->i_fsnotify_marks, i.i_list) { |
150 | hlist_for_each_entry(mark, node, &inode->i_fsnotify_marks, i.i_list) { | ||
151 | if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) | 153 | if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) |
152 | mark->ignored_mask = 0; | 154 | mark->ignored_mask = 0; |
153 | } | 155 | } |
154 | spin_unlock(&inode->i_lock); | ||
155 | } | 156 | } |
156 | 157 | ||
157 | if (data_is == FSNOTIFY_EVENT_FILE) { | 158 | if (data_is == FSNOTIFY_EVENT_FILE) { |
@@ -159,14 +160,14 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is) | |||
159 | 160 | ||
160 | mnt = ((struct file *)data)->f_path.mnt; | 161 | mnt = ((struct file *)data)->f_path.mnt; |
161 | if (mnt && !hlist_empty(&mnt->mnt_fsnotify_marks)) { | 162 | if (mnt && !hlist_empty(&mnt->mnt_fsnotify_marks)) { |
162 | spin_lock(&mnt->mnt_root->d_lock); | 163 | hlist_for_each_entry_rcu(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) { |
163 | hlist_for_each_entry(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) { | ||
164 | if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) | 164 | if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) |
165 | mark->ignored_mask = 0; | 165 | mark->ignored_mask = 0; |
166 | } | 166 | } |
167 | spin_unlock(&mnt->mnt_root->d_lock); | ||
168 | } | 167 | } |
169 | } | 168 | } |
169 | |||
170 | srcu_read_unlock(&fsnotify_mark_srcu, idx); | ||
170 | } | 171 | } |
171 | 172 | ||
172 | static int send_to_group(struct fsnotify_group *group, struct inode *to_tell, | 173 | static int send_to_group(struct fsnotify_group *group, struct inode *to_tell, |
@@ -208,8 +209,10 @@ static bool needed_by_vfsmount(__u32 test_mask, struct vfsmount *mnt) | |||
208 | int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | 209 | int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, |
209 | const unsigned char *file_name, u32 cookie) | 210 | const unsigned char *file_name, u32 cookie) |
210 | { | 211 | { |
212 | struct fsnotify_mark *mark; | ||
211 | struct fsnotify_group *group; | 213 | struct fsnotify_group *group; |
212 | struct fsnotify_event *event = NULL; | 214 | struct fsnotify_event *event = NULL; |
215 | struct hlist_node *node; | ||
213 | struct vfsmount *mnt = NULL; | 216 | struct vfsmount *mnt = NULL; |
214 | int idx, ret = 0; | 217 | int idx, ret = 0; |
215 | /* global tests shouldn't care about events on child only the specific event */ | 218 | /* global tests shouldn't care about events on child only the specific event */ |
@@ -237,35 +240,47 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, | |||
237 | !needed_by_vfsmount(test_mask, mnt)) | 240 | !needed_by_vfsmount(test_mask, mnt)) |
238 | return 0; | 241 | return 0; |
239 | 242 | ||
240 | /* | 243 | idx = srcu_read_lock(&fsnotify_mark_srcu); |
241 | * SRCU!! the groups list is very very much read only and the path is | ||
242 | * very hot. The VAST majority of events are not going to need to do | ||
243 | * anything other than walk the list so it's crazy to pre-allocate. | ||
244 | */ | ||
245 | idx = srcu_read_lock(&fsnotify_grp_srcu); | ||
246 | 244 | ||
247 | if (test_mask & to_tell->i_fsnotify_mask) { | 245 | if (test_mask & to_tell->i_fsnotify_mask) { |
248 | list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) { | 246 | hlist_for_each_entry_rcu(mark, node, &to_tell->i_fsnotify_marks, i.i_list) { |
249 | if (test_mask & group->mask) { | 247 | |
250 | ret = send_to_group(group, to_tell, NULL, mask, data, data_is, | 248 | pr_debug("%s: inode_loop: mark=%p mark->mask=%x mark->ignored_mask=%x\n", |
251 | cookie, file_name, &event); | 249 | __func__, mark, mark->mask, mark->ignored_mask); |
250 | |||
251 | if (test_mask & mark->mask & ~mark->ignored_mask) { | ||
252 | group = mark->group; | ||
253 | if (!group) | ||
254 | continue; | ||
255 | ret = send_to_group(group, to_tell, NULL, mask, | ||
256 | data, data_is, cookie, file_name, | ||
257 | &event); | ||
252 | if (ret) | 258 | if (ret) |
253 | goto out; | 259 | goto out; |
254 | } | 260 | } |
255 | } | 261 | } |
256 | } | 262 | } |
257 | if (needed_by_vfsmount(test_mask, mnt)) { | 263 | |
258 | list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list) { | 264 | if (mnt && (test_mask & mnt->mnt_fsnotify_mask)) { |
259 | if (test_mask & group->mask) { | 265 | hlist_for_each_entry_rcu(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) { |
260 | ret = send_to_group(group, to_tell, mnt, mask, data, data_is, | 266 | |
261 | cookie, file_name, &event); | 267 | pr_debug("%s: mnt_loop: mark=%p mark->mask=%x mark->ignored_mask=%x\n", |
268 | __func__, mark, mark->mask, mark->ignored_mask); | ||
269 | |||
270 | if (test_mask & mark->mask & ~mark->ignored_mask) { | ||
271 | group = mark->group; | ||
272 | if (!group) | ||
273 | continue; | ||
274 | ret = send_to_group(group, to_tell, mnt, mask, | ||
275 | data, data_is, cookie, file_name, | ||
276 | &event); | ||
262 | if (ret) | 277 | if (ret) |
263 | goto out; | 278 | goto out; |
264 | } | 279 | } |
265 | } | 280 | } |
266 | } | 281 | } |
267 | out: | 282 | out: |
268 | srcu_read_unlock(&fsnotify_grp_srcu, idx); | 283 | srcu_read_unlock(&fsnotify_mark_srcu, idx); |
269 | /* | 284 | /* |
270 | * fsnotify_create_event() took a reference so the event can't be cleaned | 285 | * fsnotify_create_event() took a reference so the event can't be cleaned |
271 | * up while we are still trying to add it to lists, drop that one. | 286 | * up while we are still trying to add it to lists, drop that one. |
@@ -279,8 +294,14 @@ EXPORT_SYMBOL_GPL(fsnotify); | |||
279 | 294 | ||
280 | static __init int fsnotify_init(void) | 295 | static __init int fsnotify_init(void) |
281 | { | 296 | { |
297 | int ret; | ||
298 | |||
282 | BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23); | 299 | BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23); |
283 | 300 | ||
284 | return init_srcu_struct(&fsnotify_grp_srcu); | 301 | ret = init_srcu_struct(&fsnotify_mark_srcu); |
302 | if (ret) | ||
303 | panic("initializing fsnotify_mark_srcu"); | ||
304 | |||
305 | return 0; | ||
285 | } | 306 | } |
286 | subsys_initcall(fsnotify_init); | 307 | core_initcall(fsnotify_init); |
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 1be54f6f9e7d..7eed86f942ba 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h | |||
@@ -6,8 +6,6 @@ | |||
6 | #include <linux/srcu.h> | 6 | #include <linux/srcu.h> |
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
8 | 8 | ||
9 | /* protects reads of fsnotify_groups */ | ||
10 | extern struct srcu_struct fsnotify_grp_srcu; | ||
11 | /* all groups which receive inode fsnotify events */ | 9 | /* all groups which receive inode fsnotify events */ |
12 | extern struct list_head fsnotify_inode_groups; | 10 | extern struct list_head fsnotify_inode_groups; |
13 | /* all groups which receive vfsmount fsnotify events */ | 11 | /* all groups which receive vfsmount fsnotify events */ |
@@ -20,6 +18,9 @@ extern __u32 fsnotify_vfsmount_mask; | |||
20 | /* destroy all events sitting in this groups notification queue */ | 18 | /* destroy all events sitting in this groups notification queue */ |
21 | extern void fsnotify_flush_notify(struct fsnotify_group *group); | 19 | extern void fsnotify_flush_notify(struct fsnotify_group *group); |
22 | 20 | ||
21 | /* protects reads of inode and vfsmount marks list */ | ||
22 | extern struct srcu_struct fsnotify_mark_srcu; | ||
23 | |||
23 | extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, | 24 | extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, |
24 | __u32 mask); | 25 | __u32 mask); |
25 | /* add a mark to an inode */ | 26 | /* add a mark to an inode */ |
diff --git a/fs/notify/group.c b/fs/notify/group.c index 7ac65ed4735b..48d3a6d6e47a 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c | |||
@@ -30,8 +30,6 @@ | |||
30 | 30 | ||
31 | /* protects writes to fsnotify_groups and fsnotify_mask */ | 31 | /* protects writes to fsnotify_groups and fsnotify_mask */ |
32 | static DEFINE_MUTEX(fsnotify_grp_mutex); | 32 | static DEFINE_MUTEX(fsnotify_grp_mutex); |
33 | /* protects reads while running the fsnotify_groups list */ | ||
34 | struct srcu_struct fsnotify_grp_srcu; | ||
35 | /* all groups registered to receive inode filesystem notifications */ | 33 | /* all groups registered to receive inode filesystem notifications */ |
36 | LIST_HEAD(fsnotify_inode_groups); | 34 | LIST_HEAD(fsnotify_inode_groups); |
37 | /* all groups registered to receive mount point filesystem notifications */ | 35 | /* all groups registered to receive mount point filesystem notifications */ |
@@ -50,18 +48,17 @@ void fsnotify_recalc_global_mask(void) | |||
50 | struct fsnotify_group *group; | 48 | struct fsnotify_group *group; |
51 | __u32 inode_mask = 0; | 49 | __u32 inode_mask = 0; |
52 | __u32 vfsmount_mask = 0; | 50 | __u32 vfsmount_mask = 0; |
53 | int idx; | ||
54 | 51 | ||
55 | idx = srcu_read_lock(&fsnotify_grp_srcu); | 52 | mutex_lock(&fsnotify_grp_mutex); |
56 | list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) | 53 | list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) |
57 | inode_mask |= group->mask; | 54 | inode_mask |= group->mask; |
58 | list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list) | 55 | list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list) |
59 | vfsmount_mask |= group->mask; | 56 | vfsmount_mask |= group->mask; |
60 | |||
61 | srcu_read_unlock(&fsnotify_grp_srcu, idx); | ||
62 | 57 | ||
63 | fsnotify_inode_mask = inode_mask; | 58 | fsnotify_inode_mask = inode_mask; |
64 | fsnotify_vfsmount_mask = vfsmount_mask; | 59 | fsnotify_vfsmount_mask = vfsmount_mask; |
60 | |||
61 | mutex_unlock(&fsnotify_grp_mutex); | ||
65 | } | 62 | } |
66 | 63 | ||
67 | /* | 64 | /* |
@@ -168,6 +165,8 @@ static void fsnotify_destroy_group(struct fsnotify_group *group) | |||
168 | /* clear all inode marks for this group */ | 165 | /* clear all inode marks for this group */ |
169 | fsnotify_clear_marks_by_group(group); | 166 | fsnotify_clear_marks_by_group(group); |
170 | 167 | ||
168 | synchronize_srcu(&fsnotify_mark_srcu); | ||
169 | |||
171 | /* past the point of no return, matches the initial value of 1 */ | 170 | /* past the point of no return, matches the initial value of 1 */ |
172 | if (atomic_dec_and_test(&group->num_marks)) | 171 | if (atomic_dec_and_test(&group->num_marks)) |
173 | fsnotify_final_destroy_group(group); | 172 | fsnotify_final_destroy_group(group); |
@@ -216,12 +215,7 @@ void fsnotify_put_group(struct fsnotify_group *group) | |||
216 | */ | 215 | */ |
217 | __fsnotify_evict_group(group); | 216 | __fsnotify_evict_group(group); |
218 | 217 | ||
219 | /* | ||
220 | * now it's off the list, so the only thing we might care about is | ||
221 | * srcu access.... | ||
222 | */ | ||
223 | mutex_unlock(&fsnotify_grp_mutex); | 218 | mutex_unlock(&fsnotify_grp_mutex); |
224 | synchronize_srcu(&fsnotify_grp_srcu); | ||
225 | 219 | ||
226 | /* and now it is really dead. _Nothing_ could be seeing it */ | 220 | /* and now it is really dead. _Nothing_ could be seeing it */ |
227 | fsnotify_recalc_global_mask(); | 221 | fsnotify_recalc_global_mask(); |
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 69c5a166930c..41f3990f900b 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c | |||
@@ -85,10 +85,12 @@ | |||
85 | #include <linux/fs.h> | 85 | #include <linux/fs.h> |
86 | #include <linux/init.h> | 86 | #include <linux/init.h> |
87 | #include <linux/kernel.h> | 87 | #include <linux/kernel.h> |
88 | #include <linux/kthread.h> | ||
88 | #include <linux/module.h> | 89 | #include <linux/module.h> |
89 | #include <linux/mutex.h> | 90 | #include <linux/mutex.h> |
90 | #include <linux/slab.h> | 91 | #include <linux/slab.h> |
91 | #include <linux/spinlock.h> | 92 | #include <linux/spinlock.h> |
93 | #include <linux/srcu.h> | ||
92 | #include <linux/writeback.h> /* for inode_lock */ | 94 | #include <linux/writeback.h> /* for inode_lock */ |
93 | 95 | ||
94 | #include <asm/atomic.h> | 96 | #include <asm/atomic.h> |
@@ -96,6 +98,11 @@ | |||
96 | #include <linux/fsnotify_backend.h> | 98 | #include <linux/fsnotify_backend.h> |
97 | #include "fsnotify.h" | 99 | #include "fsnotify.h" |
98 | 100 | ||
101 | struct srcu_struct fsnotify_mark_srcu; | ||
102 | static DEFINE_SPINLOCK(destroy_lock); | ||
103 | static LIST_HEAD(destroy_list); | ||
104 | static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq); | ||
105 | |||
99 | void fsnotify_get_mark(struct fsnotify_mark *mark) | 106 | void fsnotify_get_mark(struct fsnotify_mark *mark) |
100 | { | 107 | { |
101 | atomic_inc(&mark->refcnt); | 108 | atomic_inc(&mark->refcnt); |
@@ -144,11 +151,14 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark) | |||
144 | 151 | ||
145 | list_del_init(&mark->g_list); | 152 | list_del_init(&mark->g_list); |
146 | 153 | ||
147 | fsnotify_put_mark(mark); /* for i_list and g_list */ | ||
148 | |||
149 | spin_unlock(&group->mark_lock); | 154 | spin_unlock(&group->mark_lock); |
150 | spin_unlock(&mark->lock); | 155 | spin_unlock(&mark->lock); |
151 | 156 | ||
157 | spin_lock(&destroy_lock); | ||
158 | list_add(&mark->destroy_list, &destroy_list); | ||
159 | spin_unlock(&destroy_lock); | ||
160 | wake_up(&destroy_waitq); | ||
161 | |||
152 | /* | 162 | /* |
153 | * Some groups like to know that marks are being freed. This is a | 163 | * Some groups like to know that marks are being freed. This is a |
154 | * callback to the group function to let it know that this mark | 164 | * callback to the group function to let it know that this mark |
@@ -263,12 +273,17 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, | |||
263 | err: | 273 | err: |
264 | mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; | 274 | mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; |
265 | list_del_init(&mark->g_list); | 275 | list_del_init(&mark->g_list); |
276 | mark->group = NULL; | ||
266 | atomic_dec(&group->num_marks); | 277 | atomic_dec(&group->num_marks); |
267 | fsnotify_put_mark(mark); | ||
268 | 278 | ||
269 | spin_unlock(&group->mark_lock); | 279 | spin_unlock(&group->mark_lock); |
270 | spin_unlock(&mark->lock); | 280 | spin_unlock(&mark->lock); |
271 | 281 | ||
282 | spin_lock(&destroy_lock); | ||
283 | list_add(&mark->destroy_list, &destroy_list); | ||
284 | spin_unlock(&destroy_lock); | ||
285 | wake_up(&destroy_waitq); | ||
286 | |||
272 | return ret; | 287 | return ret; |
273 | } | 288 | } |
274 | 289 | ||
@@ -326,3 +341,42 @@ void fsnotify_init_mark(struct fsnotify_mark *mark, | |||
326 | atomic_set(&mark->refcnt, 1); | 341 | atomic_set(&mark->refcnt, 1); |
327 | mark->free_mark = free_mark; | 342 | mark->free_mark = free_mark; |
328 | } | 343 | } |
344 | |||
345 | static int fsnotify_mark_destroy(void *ignored) | ||
346 | { | ||
347 | struct fsnotify_mark *mark, *next; | ||
348 | LIST_HEAD(private_destroy_list); | ||
349 | |||
350 | for (;;) { | ||
351 | spin_lock(&destroy_lock); | ||
352 | list_for_each_entry_safe(mark, next, &destroy_list, destroy_list) { | ||
353 | list_del(&mark->destroy_list); | ||
354 | list_add(&mark->destroy_list, &private_destroy_list); | ||
355 | } | ||
356 | spin_unlock(&destroy_lock); | ||
357 | |||
358 | synchronize_srcu(&fsnotify_mark_srcu); | ||
359 | |||
360 | list_for_each_entry_safe(mark, next, &private_destroy_list, destroy_list) { | ||
361 | list_del_init(&mark->destroy_list); | ||
362 | fsnotify_put_mark(mark); | ||
363 | } | ||
364 | |||
365 | wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list)); | ||
366 | } | ||
367 | |||
368 | return 0; | ||
369 | } | ||
370 | |||
371 | static int __init fsnotify_mark_init(void) | ||
372 | { | ||
373 | struct task_struct *thread; | ||
374 | |||
375 | thread = kthread_run(fsnotify_mark_destroy, NULL, | ||
376 | "fsnotify_mark"); | ||
377 | if (IS_ERR(thread)) | ||
378 | panic("unable to start fsnotify mark destruction thread."); | ||
379 | |||
380 | return 0; | ||
381 | } | ||
382 | device_initcall(fsnotify_mark_init); | ||
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 8e24cdf72928..84159390969f 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h | |||
@@ -302,6 +302,7 @@ struct fsnotify_mark { | |||
302 | #define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x08 | 302 | #define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x08 |
303 | #define FSNOTIFY_MARK_FLAG_ALIVE 0x10 | 303 | #define FSNOTIFY_MARK_FLAG_ALIVE 0x10 |
304 | unsigned int flags; /* vfsmount or inode mark? */ | 304 | unsigned int flags; /* vfsmount or inode mark? */ |
305 | struct list_head destroy_list; | ||
305 | void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */ | 306 | void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */ |
306 | }; | 307 | }; |
307 | 308 | ||