aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Paris <eparis@redhat.com>2010-07-28 10:18:38 -0400
committerEric Paris <eparis@redhat.com>2010-07-28 10:18:52 -0400
commit75c1be487a690db43da2c1234fcacd84c982803c (patch)
treeb38ce47f157d3b0eff7ac6eb4756a4b390ac35ae
parent700307a29ad61090dcf1d45f8f4a135f5e9211ae (diff)
fsnotify: srcu to protect read side of inode and vfsmount locks
Currently reading the inode->i_fsnotify_marks or vfsmount->mnt_fsnotify_marks lists are protected by a spinlock on both the read and the write side. This patch protects the read side of those lists with a new single srcu. Signed-off-by: Eric Paris <eparis@redhat.com>
-rw-r--r--fs/notify/fsnotify.c69
-rw-r--r--fs/notify/fsnotify.h5
-rw-r--r--fs/notify/group.c16
-rw-r--r--fs/notify/mark.c60
-rw-r--r--include/linux/fsnotify_backend.h1
5 files changed, 111 insertions, 40 deletions
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 4788c866473a..4678b416241e 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -144,14 +144,15 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is)
144{ 144{
145 struct fsnotify_mark *mark; 145 struct fsnotify_mark *mark;
146 struct hlist_node *node; 146 struct hlist_node *node;
147 int idx;
148
149 idx = srcu_read_lock(&fsnotify_mark_srcu);
147 150
148 if (!hlist_empty(&inode->i_fsnotify_marks)) { 151 if (!hlist_empty(&inode->i_fsnotify_marks)) {
149 spin_lock(&inode->i_lock); 152 hlist_for_each_entry_rcu(mark, node, &inode->i_fsnotify_marks, i.i_list) {
150 hlist_for_each_entry(mark, node, &inode->i_fsnotify_marks, i.i_list) {
151 if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) 153 if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
152 mark->ignored_mask = 0; 154 mark->ignored_mask = 0;
153 } 155 }
154 spin_unlock(&inode->i_lock);
155 } 156 }
156 157
157 if (data_is == FSNOTIFY_EVENT_FILE) { 158 if (data_is == FSNOTIFY_EVENT_FILE) {
@@ -159,14 +160,14 @@ void __fsnotify_flush_ignored_mask(struct inode *inode, void *data, int data_is)
159 160
160 mnt = ((struct file *)data)->f_path.mnt; 161 mnt = ((struct file *)data)->f_path.mnt;
161 if (mnt && !hlist_empty(&mnt->mnt_fsnotify_marks)) { 162 if (mnt && !hlist_empty(&mnt->mnt_fsnotify_marks)) {
162 spin_lock(&mnt->mnt_root->d_lock); 163 hlist_for_each_entry_rcu(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
163 hlist_for_each_entry(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
164 if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)) 164 if (!(mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY))
165 mark->ignored_mask = 0; 165 mark->ignored_mask = 0;
166 } 166 }
167 spin_unlock(&mnt->mnt_root->d_lock);
168 } 167 }
169 } 168 }
169
170 srcu_read_unlock(&fsnotify_mark_srcu, idx);
170} 171}
171 172
172static int send_to_group(struct fsnotify_group *group, struct inode *to_tell, 173static int send_to_group(struct fsnotify_group *group, struct inode *to_tell,
@@ -208,8 +209,10 @@ static bool needed_by_vfsmount(__u32 test_mask, struct vfsmount *mnt)
208int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, 209int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
209 const unsigned char *file_name, u32 cookie) 210 const unsigned char *file_name, u32 cookie)
210{ 211{
212 struct fsnotify_mark *mark;
211 struct fsnotify_group *group; 213 struct fsnotify_group *group;
212 struct fsnotify_event *event = NULL; 214 struct fsnotify_event *event = NULL;
215 struct hlist_node *node;
213 struct vfsmount *mnt = NULL; 216 struct vfsmount *mnt = NULL;
214 int idx, ret = 0; 217 int idx, ret = 0;
215 /* global tests shouldn't care about events on child only the specific event */ 218 /* global tests shouldn't care about events on child only the specific event */
@@ -237,35 +240,47 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
237 !needed_by_vfsmount(test_mask, mnt)) 240 !needed_by_vfsmount(test_mask, mnt))
238 return 0; 241 return 0;
239 242
240 /* 243 idx = srcu_read_lock(&fsnotify_mark_srcu);
241 * SRCU!! the groups list is very very much read only and the path is
242 * very hot. The VAST majority of events are not going to need to do
243 * anything other than walk the list so it's crazy to pre-allocate.
244 */
245 idx = srcu_read_lock(&fsnotify_grp_srcu);
246 244
247 if (test_mask & to_tell->i_fsnotify_mask) { 245 if (test_mask & to_tell->i_fsnotify_mask) {
248 list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) { 246 hlist_for_each_entry_rcu(mark, node, &to_tell->i_fsnotify_marks, i.i_list) {
249 if (test_mask & group->mask) { 247
250 ret = send_to_group(group, to_tell, NULL, mask, data, data_is, 248 pr_debug("%s: inode_loop: mark=%p mark->mask=%x mark->ignored_mask=%x\n",
251 cookie, file_name, &event); 249 __func__, mark, mark->mask, mark->ignored_mask);
250
251 if (test_mask & mark->mask & ~mark->ignored_mask) {
252 group = mark->group;
253 if (!group)
254 continue;
255 ret = send_to_group(group, to_tell, NULL, mask,
256 data, data_is, cookie, file_name,
257 &event);
252 if (ret) 258 if (ret)
253 goto out; 259 goto out;
254 } 260 }
255 } 261 }
256 } 262 }
257 if (needed_by_vfsmount(test_mask, mnt)) { 263
258 list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list) { 264 if (mnt && (test_mask & mnt->mnt_fsnotify_mask)) {
259 if (test_mask & group->mask) { 265 hlist_for_each_entry_rcu(mark, node, &mnt->mnt_fsnotify_marks, m.m_list) {
260 ret = send_to_group(group, to_tell, mnt, mask, data, data_is, 266
261 cookie, file_name, &event); 267 pr_debug("%s: mnt_loop: mark=%p mark->mask=%x mark->ignored_mask=%x\n",
268 __func__, mark, mark->mask, mark->ignored_mask);
269
270 if (test_mask & mark->mask & ~mark->ignored_mask) {
271 group = mark->group;
272 if (!group)
273 continue;
274 ret = send_to_group(group, to_tell, mnt, mask,
275 data, data_is, cookie, file_name,
276 &event);
262 if (ret) 277 if (ret)
263 goto out; 278 goto out;
264 } 279 }
265 } 280 }
266 } 281 }
267out: 282out:
268 srcu_read_unlock(&fsnotify_grp_srcu, idx); 283 srcu_read_unlock(&fsnotify_mark_srcu, idx);
269 /* 284 /*
270 * fsnotify_create_event() took a reference so the event can't be cleaned 285 * fsnotify_create_event() took a reference so the event can't be cleaned
271 * up while we are still trying to add it to lists, drop that one. 286 * up while we are still trying to add it to lists, drop that one.
@@ -279,8 +294,14 @@ EXPORT_SYMBOL_GPL(fsnotify);
279 294
280static __init int fsnotify_init(void) 295static __init int fsnotify_init(void)
281{ 296{
297 int ret;
298
282 BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23); 299 BUG_ON(hweight32(ALL_FSNOTIFY_EVENTS) != 23);
283 300
284 return init_srcu_struct(&fsnotify_grp_srcu); 301 ret = init_srcu_struct(&fsnotify_mark_srcu);
302 if (ret)
303 panic("initializing fsnotify_mark_srcu");
304
305 return 0;
285} 306}
286subsys_initcall(fsnotify_init); 307core_initcall(fsnotify_init);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 1be54f6f9e7d..7eed86f942ba 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -6,8 +6,6 @@
6#include <linux/srcu.h> 6#include <linux/srcu.h>
7#include <linux/types.h> 7#include <linux/types.h>
8 8
9/* protects reads of fsnotify_groups */
10extern struct srcu_struct fsnotify_grp_srcu;
11/* all groups which receive inode fsnotify events */ 9/* all groups which receive inode fsnotify events */
12extern struct list_head fsnotify_inode_groups; 10extern struct list_head fsnotify_inode_groups;
13/* all groups which receive vfsmount fsnotify events */ 11/* all groups which receive vfsmount fsnotify events */
@@ -20,6 +18,9 @@ extern __u32 fsnotify_vfsmount_mask;
20/* destroy all events sitting in this groups notification queue */ 18/* destroy all events sitting in this groups notification queue */
21extern void fsnotify_flush_notify(struct fsnotify_group *group); 19extern void fsnotify_flush_notify(struct fsnotify_group *group);
22 20
21/* protects reads of inode and vfsmount marks list */
22extern struct srcu_struct fsnotify_mark_srcu;
23
23extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark, 24extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
24 __u32 mask); 25 __u32 mask);
25/* add a mark to an inode */ 26/* add a mark to an inode */
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 7ac65ed4735b..48d3a6d6e47a 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -30,8 +30,6 @@
30 30
31/* protects writes to fsnotify_groups and fsnotify_mask */ 31/* protects writes to fsnotify_groups and fsnotify_mask */
32static DEFINE_MUTEX(fsnotify_grp_mutex); 32static DEFINE_MUTEX(fsnotify_grp_mutex);
33/* protects reads while running the fsnotify_groups list */
34struct srcu_struct fsnotify_grp_srcu;
35/* all groups registered to receive inode filesystem notifications */ 33/* all groups registered to receive inode filesystem notifications */
36LIST_HEAD(fsnotify_inode_groups); 34LIST_HEAD(fsnotify_inode_groups);
37/* all groups registered to receive mount point filesystem notifications */ 35/* all groups registered to receive mount point filesystem notifications */
@@ -50,18 +48,17 @@ void fsnotify_recalc_global_mask(void)
50 struct fsnotify_group *group; 48 struct fsnotify_group *group;
51 __u32 inode_mask = 0; 49 __u32 inode_mask = 0;
52 __u32 vfsmount_mask = 0; 50 __u32 vfsmount_mask = 0;
53 int idx;
54 51
55 idx = srcu_read_lock(&fsnotify_grp_srcu); 52 mutex_lock(&fsnotify_grp_mutex);
56 list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list) 53 list_for_each_entry_rcu(group, &fsnotify_inode_groups, inode_group_list)
57 inode_mask |= group->mask; 54 inode_mask |= group->mask;
58 list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list) 55 list_for_each_entry_rcu(group, &fsnotify_vfsmount_groups, vfsmount_group_list)
59 vfsmount_mask |= group->mask; 56 vfsmount_mask |= group->mask;
60
61 srcu_read_unlock(&fsnotify_grp_srcu, idx);
62 57
63 fsnotify_inode_mask = inode_mask; 58 fsnotify_inode_mask = inode_mask;
64 fsnotify_vfsmount_mask = vfsmount_mask; 59 fsnotify_vfsmount_mask = vfsmount_mask;
60
61 mutex_unlock(&fsnotify_grp_mutex);
65} 62}
66 63
67/* 64/*
@@ -168,6 +165,8 @@ static void fsnotify_destroy_group(struct fsnotify_group *group)
168 /* clear all inode marks for this group */ 165 /* clear all inode marks for this group */
169 fsnotify_clear_marks_by_group(group); 166 fsnotify_clear_marks_by_group(group);
170 167
168 synchronize_srcu(&fsnotify_mark_srcu);
169
171 /* past the point of no return, matches the initial value of 1 */ 170 /* past the point of no return, matches the initial value of 1 */
172 if (atomic_dec_and_test(&group->num_marks)) 171 if (atomic_dec_and_test(&group->num_marks))
173 fsnotify_final_destroy_group(group); 172 fsnotify_final_destroy_group(group);
@@ -216,12 +215,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
216 */ 215 */
217 __fsnotify_evict_group(group); 216 __fsnotify_evict_group(group);
218 217
219 /*
220 * now it's off the list, so the only thing we might care about is
221 * srcu access....
222 */
223 mutex_unlock(&fsnotify_grp_mutex); 218 mutex_unlock(&fsnotify_grp_mutex);
224 synchronize_srcu(&fsnotify_grp_srcu);
225 219
226 /* and now it is really dead. _Nothing_ could be seeing it */ 220 /* and now it is really dead. _Nothing_ could be seeing it */
227 fsnotify_recalc_global_mask(); 221 fsnotify_recalc_global_mask();
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 69c5a166930c..41f3990f900b 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -85,10 +85,12 @@
85#include <linux/fs.h> 85#include <linux/fs.h>
86#include <linux/init.h> 86#include <linux/init.h>
87#include <linux/kernel.h> 87#include <linux/kernel.h>
88#include <linux/kthread.h>
88#include <linux/module.h> 89#include <linux/module.h>
89#include <linux/mutex.h> 90#include <linux/mutex.h>
90#include <linux/slab.h> 91#include <linux/slab.h>
91#include <linux/spinlock.h> 92#include <linux/spinlock.h>
93#include <linux/srcu.h>
92#include <linux/writeback.h> /* for inode_lock */ 94#include <linux/writeback.h> /* for inode_lock */
93 95
94#include <asm/atomic.h> 96#include <asm/atomic.h>
@@ -96,6 +98,11 @@
96#include <linux/fsnotify_backend.h> 98#include <linux/fsnotify_backend.h>
97#include "fsnotify.h" 99#include "fsnotify.h"
98 100
101struct srcu_struct fsnotify_mark_srcu;
102static DEFINE_SPINLOCK(destroy_lock);
103static LIST_HEAD(destroy_list);
104static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq);
105
99void fsnotify_get_mark(struct fsnotify_mark *mark) 106void fsnotify_get_mark(struct fsnotify_mark *mark)
100{ 107{
101 atomic_inc(&mark->refcnt); 108 atomic_inc(&mark->refcnt);
@@ -144,11 +151,14 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
144 151
145 list_del_init(&mark->g_list); 152 list_del_init(&mark->g_list);
146 153
147 fsnotify_put_mark(mark); /* for i_list and g_list */
148
149 spin_unlock(&group->mark_lock); 154 spin_unlock(&group->mark_lock);
150 spin_unlock(&mark->lock); 155 spin_unlock(&mark->lock);
151 156
157 spin_lock(&destroy_lock);
158 list_add(&mark->destroy_list, &destroy_list);
159 spin_unlock(&destroy_lock);
160 wake_up(&destroy_waitq);
161
152 /* 162 /*
153 * Some groups like to know that marks are being freed. This is a 163 * Some groups like to know that marks are being freed. This is a
154 * callback to the group function to let it know that this mark 164 * callback to the group function to let it know that this mark
@@ -263,12 +273,17 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
263err: 273err:
264 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; 274 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
265 list_del_init(&mark->g_list); 275 list_del_init(&mark->g_list);
276 mark->group = NULL;
266 atomic_dec(&group->num_marks); 277 atomic_dec(&group->num_marks);
267 fsnotify_put_mark(mark);
268 278
269 spin_unlock(&group->mark_lock); 279 spin_unlock(&group->mark_lock);
270 spin_unlock(&mark->lock); 280 spin_unlock(&mark->lock);
271 281
282 spin_lock(&destroy_lock);
283 list_add(&mark->destroy_list, &destroy_list);
284 spin_unlock(&destroy_lock);
285 wake_up(&destroy_waitq);
286
272 return ret; 287 return ret;
273} 288}
274 289
@@ -326,3 +341,42 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
326 atomic_set(&mark->refcnt, 1); 341 atomic_set(&mark->refcnt, 1);
327 mark->free_mark = free_mark; 342 mark->free_mark = free_mark;
328} 343}
344
345static int fsnotify_mark_destroy(void *ignored)
346{
347 struct fsnotify_mark *mark, *next;
348 LIST_HEAD(private_destroy_list);
349
350 for (;;) {
351 spin_lock(&destroy_lock);
352 list_for_each_entry_safe(mark, next, &destroy_list, destroy_list) {
353 list_del(&mark->destroy_list);
354 list_add(&mark->destroy_list, &private_destroy_list);
355 }
356 spin_unlock(&destroy_lock);
357
358 synchronize_srcu(&fsnotify_mark_srcu);
359
360 list_for_each_entry_safe(mark, next, &private_destroy_list, destroy_list) {
361 list_del_init(&mark->destroy_list);
362 fsnotify_put_mark(mark);
363 }
364
365 wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list));
366 }
367
368 return 0;
369}
370
371static int __init fsnotify_mark_init(void)
372{
373 struct task_struct *thread;
374
375 thread = kthread_run(fsnotify_mark_destroy, NULL,
376 "fsnotify_mark");
377 if (IS_ERR(thread))
378 panic("unable to start fsnotify mark destruction thread.");
379
380 return 0;
381}
382device_initcall(fsnotify_mark_init);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 8e24cdf72928..84159390969f 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -302,6 +302,7 @@ struct fsnotify_mark {
302#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x08 302#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x08
303#define FSNOTIFY_MARK_FLAG_ALIVE 0x10 303#define FSNOTIFY_MARK_FLAG_ALIVE 0x10
304 unsigned int flags; /* vfsmount or inode mark? */ 304 unsigned int flags; /* vfsmount or inode mark? */
305 struct list_head destroy_list;
305 void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */ 306 void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */
306}; 307};
307 308