aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEric Paris <eparis@redhat.com>2009-12-17 21:24:27 -0500
committerEric Paris <eparis@redhat.com>2010-07-28 09:58:57 -0400
commit5444e2981c31d0ed7465475e451b8437084337e5 (patch)
tree66b6d84b7aab886b44a3467a139d258d9aba09df
parent32c3263221bd63316815286dccacdc7abfd7f3c4 (diff)
fsnotify: split generic and inode specific mark code
currently all marking is done by functions in inode-mark.c. Some of this is pretty generic and should be instead done in a generic function and we should only put the inode specific code in inode-mark.c Signed-off-by: Eric Paris <eparis@redhat.com>
-rw-r--r--fs/notify/Makefile3
-rw-r--r--fs/notify/dnotify/dnotify.c12
-rw-r--r--fs/notify/fanotify/fanotify.c2
-rw-r--r--fs/notify/fanotify/fanotify_user.c8
-rw-r--r--fs/notify/fsnotify.h7
-rw-r--r--fs/notify/inode_mark.c246
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c4
-rw-r--r--fs/notify/inotify/inotify_user.c4
-rw-r--r--fs/notify/mark.c294
-rw-r--r--include/linux/fsnotify_backend.h5
-rw-r--r--kernel/audit_tree.c8
-rw-r--r--kernel/audit_watch.c6
12 files changed, 347 insertions, 252 deletions
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index 396a38779371..8f7f3b024a2e 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -1,4 +1,5 @@
1obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o 1obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o \
2 mark.o
2 3
3obj-y += dnotify/ 4obj-y += dnotify/
4obj-y += inotify/ 5obj-y += inotify/
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index cac2eb896639..69f42df9ba45 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -95,7 +95,7 @@ static int dnotify_handle_event(struct fsnotify_group *group,
95 95
96 to_tell = event->to_tell; 96 to_tell = event->to_tell;
97 97
98 fsn_mark = fsnotify_find_mark(group, to_tell); 98 fsn_mark = fsnotify_find_inode_mark(group, to_tell);
99 if (unlikely(!fsn_mark)) 99 if (unlikely(!fsn_mark))
100 return 0; 100 return 0;
101 dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); 101 dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
@@ -143,14 +143,14 @@ static bool dnotify_should_send_event(struct fsnotify_group *group,
143 if (!S_ISDIR(inode->i_mode)) 143 if (!S_ISDIR(inode->i_mode))
144 return false; 144 return false;
145 145
146 fsn_mark = fsnotify_find_mark(group, inode); 146 fsn_mark = fsnotify_find_inode_mark(group, inode);
147 if (!fsn_mark) 147 if (!fsn_mark)
148 return false; 148 return false;
149 149
150 mask = (mask & ~FS_EVENT_ON_CHILD); 150 mask = (mask & ~FS_EVENT_ON_CHILD);
151 send = (mask & fsn_mark->mask); 151 send = (mask & fsn_mark->mask);
152 152
153 fsnotify_put_mark(fsn_mark); /* matches fsnotify_find_mark */ 153 fsnotify_put_mark(fsn_mark); /* matches fsnotify_find_inode_mark */
154 154
155 return send; 155 return send;
156} 156}
@@ -193,7 +193,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
193 if (!S_ISDIR(inode->i_mode)) 193 if (!S_ISDIR(inode->i_mode))
194 return; 194 return;
195 195
196 fsn_mark = fsnotify_find_mark(dnotify_group, inode); 196 fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
197 if (!fsn_mark) 197 if (!fsn_mark)
198 return; 198 return;
199 dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); 199 dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
@@ -346,12 +346,12 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
346 mutex_lock(&dnotify_mark_mutex); 346 mutex_lock(&dnotify_mark_mutex);
347 347
348 /* add the new_fsn_mark or find an old one. */ 348 /* add the new_fsn_mark or find an old one. */
349 fsn_mark = fsnotify_find_mark(dnotify_group, inode); 349 fsn_mark = fsnotify_find_inode_mark(dnotify_group, inode);
350 if (fsn_mark) { 350 if (fsn_mark) {
351 dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark); 351 dn_mark = container_of(fsn_mark, struct dnotify_mark, fsn_mark);
352 spin_lock(&fsn_mark->lock); 352 spin_lock(&fsn_mark->lock);
353 } else { 353 } else {
354 fsnotify_add_mark(new_fsn_mark, dnotify_group, inode, 0); 354 fsnotify_add_mark(new_fsn_mark, dnotify_group, inode, NULL, 0);
355 spin_lock(&new_fsn_mark->lock); 355 spin_lock(&new_fsn_mark->lock);
356 fsn_mark = new_fsn_mark; 356 fsn_mark = new_fsn_mark;
357 dn_mark = new_dn_mark; 357 dn_mark = new_dn_mark;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 881067dc7923..aa5e92661142 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -118,7 +118,7 @@ static bool fanotify_should_send_event(struct fsnotify_group *group, struct inod
118 if (data_type != FSNOTIFY_EVENT_PATH) 118 if (data_type != FSNOTIFY_EVENT_PATH)
119 return false; 119 return false;
120 120
121 fsn_mark = fsnotify_find_mark(group, inode); 121 fsn_mark = fsnotify_find_inode_mark(group, inode);
122 if (!fsn_mark) 122 if (!fsn_mark)
123 return false; 123 return false;
124 124
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 66e38fc052b2..05351936a725 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -305,7 +305,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
305 pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, 305 pr_debug("%s: group=%p inode=%p mask=%x\n", __func__,
306 group, inode, mask); 306 group, inode, mask);
307 307
308 fsn_mark = fsnotify_find_mark(group, inode); 308 fsn_mark = fsnotify_find_inode_mark(group, inode);
309 if (!fsn_mark) 309 if (!fsn_mark)
310 return -ENOENT; 310 return -ENOENT;
311 311
@@ -321,7 +321,7 @@ static int fanotify_remove_mark(struct fsnotify_group *group,
321 321
322 fsnotify_recalc_group_mask(group); 322 fsnotify_recalc_group_mask(group);
323 323
324 /* matches the fsnotify_find_mark() */ 324 /* matches the fsnotify_find_inode_mark() */
325 fsnotify_put_mark(fsn_mark); 325 fsnotify_put_mark(fsn_mark);
326 326
327 return 0; 327 return 0;
@@ -338,7 +338,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
338 pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, 338 pr_debug("%s: group=%p inode=%p mask=%x\n", __func__,
339 group, inode, mask); 339 group, inode, mask);
340 340
341 fsn_mark = fsnotify_find_mark(group, inode); 341 fsn_mark = fsnotify_find_inode_mark(group, inode);
342 if (!fsn_mark) { 342 if (!fsn_mark) {
343 struct fsnotify_mark *new_fsn_mark; 343 struct fsnotify_mark *new_fsn_mark;
344 344
@@ -348,7 +348,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
348 goto out; 348 goto out;
349 349
350 fsnotify_init_mark(new_fsn_mark, fanotify_free_mark); 350 fsnotify_init_mark(new_fsn_mark, fanotify_free_mark);
351 ret = fsnotify_add_mark(new_fsn_mark, group, inode, 0); 351 ret = fsnotify_add_mark(new_fsn_mark, group, inode, NULL, 0);
352 if (ret) { 352 if (ret) {
353 fanotify_free_mark(new_fsn_mark); 353 fanotify_free_mark(new_fsn_mark);
354 goto out; 354 goto out;
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 2ba59158969f..7c7a904b802d 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -20,6 +20,11 @@ extern __u32 fsnotify_vfsmount_mask;
20/* destroy all events sitting in this groups notification queue */ 20/* destroy all events sitting in this groups notification queue */
21extern void fsnotify_flush_notify(struct fsnotify_group *group); 21extern void fsnotify_flush_notify(struct fsnotify_group *group);
22 22
23/* add a mark to an inode */
24extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
25 struct fsnotify_group *group, struct inode *inode,
26 int allow_dups);
27
23/* add a group to the inode group list */ 28/* add a group to the inode group list */
24extern void fsnotify_add_inode_group(struct fsnotify_group *group); 29extern void fsnotify_add_inode_group(struct fsnotify_group *group);
25/* add a group to the vfsmount group list */ 30/* add a group to the vfsmount group list */
@@ -27,6 +32,8 @@ extern void fsnotify_add_vfsmount_group(struct fsnotify_group *group);
27/* final kfree of a group */ 32/* final kfree of a group */
28extern void fsnotify_final_destroy_group(struct fsnotify_group *group); 33extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
29 34
35/* inode specific destruction of a mark */
36extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark);
30/* run the list of all marks associated with inode and flag them to be freed */ 37/* run the list of all marks associated with inode and flag them to be freed */
31extern void fsnotify_clear_marks_by_inode(struct inode *inode); 38extern void fsnotify_clear_marks_by_inode(struct inode *inode);
32/* 39/*
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index ba6f9833561b..c925579ba011 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -16,72 +16,6 @@
16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
17 */ 17 */
18 18
19/*
20 * fsnotify inode mark locking/lifetime/and refcnting
21 *
22 * REFCNT:
23 * The mark->refcnt tells how many "things" in the kernel currently are
24 * referencing this object. The object typically will live inside the kernel
25 * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task
26 * which can find this object holding the appropriete locks, can take a reference
27 * and the object itself is guarenteed to survive until the reference is dropped.
28 *
29 * LOCKING:
30 * There are 3 spinlocks involved with fsnotify inode marks and they MUST
31 * be taken in order as follows:
32 *
33 * mark->lock
34 * group->mark_lock
35 * inode->i_lock
36 *
37 * mark->lock protects 2 things, mark->group and mark->inode. You must hold
38 * that lock to dereference either of these things (they could be NULL even with
39 * the lock)
40 *
41 * group->mark_lock protects the marks_list anchored inside a given group
42 * and each mark is hooked via the g_list. It also sorta protects the
43 * free_g_list, which when used is anchored by a private list on the stack of the
44 * task which held the group->mark_lock.
45 *
46 * inode->i_lock protects the i_fsnotify_marks list anchored inside a
47 * given inode and each mark is hooked via the i_list. (and sorta the
48 * free_i_list)
49 *
50 *
51 * LIFETIME:
52 * Inode marks survive between when they are added to an inode and when their
53 * refcnt==0.
54 *
55 * The inode mark can be cleared for a number of different reasons including:
56 * - The inode is unlinked for the last time. (fsnotify_inode_remove)
57 * - The inode is being evicted from cache. (fsnotify_inode_delete)
58 * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes)
59 * - Something explicitly requests that it be removed. (fsnotify_destroy_mark)
60 * - The fsnotify_group associated with the mark is going away and all such marks
61 * need to be cleaned up. (fsnotify_clear_marks_by_group)
62 *
63 * Worst case we are given an inode and need to clean up all the marks on that
64 * inode. We take i_lock and walk the i_fsnotify_marks safely. For each
65 * mark on the list we take a reference (so the mark can't disappear under us).
66 * We remove that mark form the inode's list of marks and we add this mark to a
67 * private list anchored on the stack using i_free_list; At this point we no
68 * longer fear anything finding the mark using the inode's list of marks.
69 *
70 * We can safely and locklessly run the private list on the stack of everything
71 * we just unattached from the original inode. For each mark on the private list
72 * we grab the mark-> and can thus dereference mark->group and mark->inode. If
73 * we see the group and inode are not NULL we take those locks. Now holding all
74 * 3 locks we can completely remove the mark from other tasks finding it in the
75 * future. Remember, 10 things might already be referencing this mark, but they
76 * better be holding a ref. We drop our reference we took before we unhooked it
77 * from the inode. When the ref hits 0 we can free the mark.
78 *
79 * Very similarly for freeing by group, except we use free_g_list.
80 *
81 * This has the very interesting property of being able to run concurrently with
82 * any (or all) other directions.
83 */
84
85#include <linux/fs.h> 19#include <linux/fs.h>
86#include <linux/init.h> 20#include <linux/init.h>
87#include <linux/kernel.h> 21#include <linux/kernel.h>
@@ -95,17 +29,6 @@
95#include <linux/fsnotify_backend.h> 29#include <linux/fsnotify_backend.h>
96#include "fsnotify.h" 30#include "fsnotify.h"
97 31
98void fsnotify_get_mark(struct fsnotify_mark *mark)
99{
100 atomic_inc(&mark->refcnt);
101}
102
103void fsnotify_put_mark(struct fsnotify_mark *mark)
104{
105 if (atomic_dec_and_test(&mark->refcnt))
106 mark->free_mark(mark);
107}
108
109/* 32/*
110 * Recalculate the mask of events relevant to a given inode locked. 33 * Recalculate the mask of events relevant to a given inode locked.
111 */ 34 */
@@ -135,44 +58,18 @@ void fsnotify_recalc_inode_mask(struct inode *inode)
135 __fsnotify_update_child_dentry_flags(inode); 58 __fsnotify_update_child_dentry_flags(inode);
136} 59}
137 60
138/* 61void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
139 * Any time a mark is getting freed we end up here.
140 * The caller had better be holding a reference to this mark so we don't actually
141 * do the final put under the mark->lock
142 */
143void fsnotify_destroy_mark(struct fsnotify_mark *mark)
144{ 62{
145 struct fsnotify_group *group; 63 struct inode *inode = mark->i.inode;
146 struct inode *inode;
147
148 spin_lock(&mark->lock);
149 64
150 group = mark->group; 65 assert_spin_locked(&mark->lock);
151 inode = mark->i.inode; 66 assert_spin_locked(&mark->group->mark_lock);
152 67
153 BUG_ON(group && !inode);
154 BUG_ON(!group && inode);
155
156 /* if !group something else already marked this to die */
157 if (!group) {
158 spin_unlock(&mark->lock);
159 return;
160 }
161
162 /* 1 from caller and 1 for being on i_list/g_list */
163 BUG_ON(atomic_read(&mark->refcnt) < 2);
164
165 spin_lock(&group->mark_lock);
166 spin_lock(&inode->i_lock); 68 spin_lock(&inode->i_lock);
167 69
168 hlist_del_init(&mark->i.i_list); 70 hlist_del_init(&mark->i.i_list);
169 mark->i.inode = NULL; 71 mark->i.inode = NULL;
170 72
171 list_del_init(&mark->g_list);
172 mark->group = NULL;
173
174 fsnotify_put_mark(mark); /* for i_list and g_list */
175
176 /* 73 /*
177 * this mark is now off the inode->i_fsnotify_marks list and we 74 * this mark is now off the inode->i_fsnotify_marks list and we
178 * hold the inode->i_lock, so this is the perfect time to update the 75 * hold the inode->i_lock, so this is the perfect time to update the
@@ -181,61 +78,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
181 fsnotify_recalc_inode_mask_locked(inode); 78 fsnotify_recalc_inode_mask_locked(inode);
182 79
183 spin_unlock(&inode->i_lock); 80 spin_unlock(&inode->i_lock);
184 spin_unlock(&group->mark_lock);
185 spin_unlock(&mark->lock);
186
187 /*
188 * Some groups like to know that marks are being freed. This is a
189 * callback to the group function to let it know that this mark
190 * is being freed.
191 */
192 if (group->ops->freeing_mark)
193 group->ops->freeing_mark(mark, group);
194
195 /*
196 * __fsnotify_update_child_dentry_flags(inode);
197 *
198 * I really want to call that, but we can't, we have no idea if the inode
199 * still exists the second we drop the mark->lock.
200 *
201 * The next time an event arrive to this inode from one of it's children
202 * __fsnotify_parent will see that the inode doesn't care about it's
203 * children and will update all of these flags then. So really this
204 * is just a lazy update (and could be a perf win...)
205 */
206
207
208 iput(inode);
209
210 /*
211 * it's possible that this group tried to destroy itself, but this
212 * this mark was simultaneously being freed by inode. If that's the
213 * case, we finish freeing the group here.
214 */
215 if (unlikely(atomic_dec_and_test(&group->num_marks)))
216 fsnotify_final_destroy_group(group);
217}
218
219/*
220 * Given a group, destroy all of the marks associated with that group.
221 */
222void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
223{
224 struct fsnotify_mark *lmark, *mark;
225 LIST_HEAD(free_list);
226
227 spin_lock(&group->mark_lock);
228 list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
229 list_add(&mark->free_g_list, &free_list);
230 list_del_init(&mark->g_list);
231 fsnotify_get_mark(mark);
232 }
233 spin_unlock(&group->mark_lock);
234
235 list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) {
236 fsnotify_destroy_mark(mark);
237 fsnotify_put_mark(mark);
238 }
239} 81}
240 82
241/* 83/*
@@ -261,8 +103,12 @@ void fsnotify_clear_marks_by_inode(struct inode *inode)
261 } 103 }
262} 104}
263 105
264static struct fsnotify_mark *fsnotify_find_mark_locked(struct fsnotify_group *group, 106/*
265 struct inode *inode) 107 * given a group and inode, find the mark associated with that combination.
108 * if found take a reference to that mark and return it, else return NULL
109 */
110struct fsnotify_mark *fsnotify_find_inode_mark_locked(struct fsnotify_group *group,
111 struct inode *inode)
266{ 112{
267 struct fsnotify_mark *mark; 113 struct fsnotify_mark *mark;
268 struct hlist_node *pos; 114 struct hlist_node *pos;
@@ -282,50 +128,26 @@ static struct fsnotify_mark *fsnotify_find_mark_locked(struct fsnotify_group *gr
282 * given a group and inode, find the mark associated with that combination. 128 * given a group and inode, find the mark associated with that combination.
283 * if found take a reference to that mark and return it, else return NULL 129 * if found take a reference to that mark and return it, else return NULL
284 */ 130 */
285struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_group *group, 131struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group,
286 struct inode *inode) 132 struct inode *inode)
287{ 133{
288 struct fsnotify_mark *mark; 134 struct fsnotify_mark *mark;
289 135
290 spin_lock(&inode->i_lock); 136 spin_lock(&inode->i_lock);
291 mark = fsnotify_find_mark_locked(group, inode); 137 mark = fsnotify_find_inode_mark_locked(group, inode);
292 spin_unlock(&inode->i_lock); 138 spin_unlock(&inode->i_lock);
293 139
294 return mark; 140 return mark;
295} 141}
296 142
297void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
298{
299 assert_spin_locked(&old->lock);
300 new->i.inode = old->i.inode;
301 new->group = old->group;
302 new->mask = old->mask;
303 new->free_mark = old->free_mark;
304}
305
306/*
307 * Nothing fancy, just initialize lists and locks and counters.
308 */
309void fsnotify_init_mark(struct fsnotify_mark *mark,
310 void (*free_mark)(struct fsnotify_mark *mark))
311{
312 spin_lock_init(&mark->lock);
313 atomic_set(&mark->refcnt, 1);
314 INIT_HLIST_NODE(&mark->i.i_list);
315 mark->group = NULL;
316 mark->mask = 0;
317 mark->i.inode = NULL;
318 mark->free_mark = free_mark;
319}
320
321/* 143/*
322 * Attach an initialized mark mark to a given group and inode. 144 * Attach an initialized mark mark to a given group and inode.
323 * These marks may be used for the fsnotify backend to determine which 145 * These marks may be used for the fsnotify backend to determine which
324 * event types should be delivered to which group and for which inodes. 146 * event types should be delivered to which group and for which inodes.
325 */ 147 */
326int fsnotify_add_mark(struct fsnotify_mark *mark, 148int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
327 struct fsnotify_group *group, struct inode *inode, 149 struct fsnotify_group *group, struct inode *inode,
328 int allow_dups) 150 int allow_dups)
329{ 151{
330 struct fsnotify_mark *lmark = NULL; 152 struct fsnotify_mark *lmark = NULL;
331 int ret = 0; 153 int ret = 0;
@@ -336,56 +158,26 @@ int fsnotify_add_mark(struct fsnotify_mark *mark,
336 158
337 mark->flags = FSNOTIFY_MARK_FLAG_INODE; 159 mark->flags = FSNOTIFY_MARK_FLAG_INODE;
338 160
339 /* 161 assert_spin_locked(&mark->lock);
340 * if this group isn't being testing for inode type events we need 162 assert_spin_locked(&group->mark_lock);
341 * to start testing
342 */
343 if (unlikely(list_empty(&group->inode_group_list)))
344 fsnotify_add_inode_group(group);
345 /*
346 * XXX This is where we could also do the fsnotify_add_vfsmount_group
347 * if we are setting and vfsmount mark....
348
349 if (unlikely(list_empty(&group->vfsmount_group_list)))
350 fsnotify_add_vfsmount_group(group);
351 */
352 163
353 /*
354 * LOCKING ORDER!!!!
355 * mark->lock
356 * group->mark_lock
357 * inode->i_lock
358 */
359 spin_lock(&mark->lock);
360 spin_lock(&group->mark_lock);
361 spin_lock(&inode->i_lock); 164 spin_lock(&inode->i_lock);
362 165
363 if (!allow_dups) 166 if (!allow_dups)
364 lmark = fsnotify_find_mark_locked(group, inode); 167 lmark = fsnotify_find_inode_mark_locked(group, inode);
365 if (!lmark) { 168 if (!lmark) {
366 mark->group = group;
367 mark->i.inode = inode; 169 mark->i.inode = inode;
368 170
369 hlist_add_head(&mark->i.i_list, &inode->i_fsnotify_marks); 171 hlist_add_head(&mark->i.i_list, &inode->i_fsnotify_marks);
370 list_add(&mark->g_list, &group->marks_list);
371
372 fsnotify_get_mark(mark); /* for i_list and g_list */
373
374 atomic_inc(&group->num_marks);
375 172
376 fsnotify_recalc_inode_mask_locked(inode); 173 fsnotify_recalc_inode_mask_locked(inode);
377 } 174 }
378 175
379 spin_unlock(&inode->i_lock); 176 spin_unlock(&inode->i_lock);
380 spin_unlock(&group->mark_lock);
381 spin_unlock(&mark->lock);
382 177
383 if (lmark) { 178 if (lmark) {
384 ret = -EEXIST; 179 ret = -EEXIST;
385 iput(inode); 180 iput(inode);
386 fsnotify_put_mark(lmark);
387 } else {
388 __fsnotify_update_child_dentry_flags(inode);
389 } 181 }
390 182
391 return ret; 183 return ret;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index cc8f6bcbb4a3..1d237e1bf7b1 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -97,7 +97,7 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
97 97
98 to_tell = event->to_tell; 98 to_tell = event->to_tell;
99 99
100 fsn_mark = fsnotify_find_mark(group, to_tell); 100 fsn_mark = fsnotify_find_inode_mark(group, to_tell);
101 /* race with watch removal? We already passes should_send */ 101 /* race with watch removal? We already passes should_send */
102 if (unlikely(!fsn_mark)) 102 if (unlikely(!fsn_mark))
103 return 0; 103 return 0;
@@ -145,7 +145,7 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
145 struct fsnotify_mark *fsn_mark; 145 struct fsnotify_mark *fsn_mark;
146 bool send; 146 bool send;
147 147
148 fsn_mark = fsnotify_find_mark(group, inode); 148 fsn_mark = fsnotify_find_inode_mark(group, inode);
149 if (!fsn_mark) 149 if (!fsn_mark)
150 return false; 150 return false;
151 151
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index ad5a1ea7827e..a12315a7553d 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -566,7 +566,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
566 if (unlikely(!mask)) 566 if (unlikely(!mask))
567 return -EINVAL; 567 return -EINVAL;
568 568
569 fsn_mark = fsnotify_find_mark(group, inode); 569 fsn_mark = fsnotify_find_inode_mark(group, inode);
570 if (!fsn_mark) 570 if (!fsn_mark)
571 return -ENOENT; 571 return -ENOENT;
572 572
@@ -644,7 +644,7 @@ static int inotify_new_watch(struct fsnotify_group *group,
644 goto out_err; 644 goto out_err;
645 645
646 /* we are on the idr, now get on the inode */ 646 /* we are on the idr, now get on the inode */
647 ret = fsnotify_add_mark(&tmp_i_mark->fsn_mark, group, inode, 0); 647 ret = fsnotify_add_mark(&tmp_i_mark->fsn_mark, group, inode, NULL, 0);
648 if (ret) { 648 if (ret) {
649 /* we failed to get on the inode, get off the idr */ 649 /* we failed to get on the inode, get off the idr */
650 inotify_remove_from_idr(group, tmp_i_mark); 650 inotify_remove_from_idr(group, tmp_i_mark);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
new file mode 100644
index 000000000000..e56e8768d676
--- /dev/null
+++ b/fs/notify/mark.c
@@ -0,0 +1,294 @@
1/*
2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; see the file COPYING. If not, write to
16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19/*
20 * fsnotify inode mark locking/lifetime/and refcnting
21 *
22 * REFCNT:
23 * The mark->refcnt tells how many "things" in the kernel currently are
24 * referencing this object. The object typically will live inside the kernel
25 * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task
26 * which can find this object holding the appropriete locks, can take a reference
27 * and the object itself is guarenteed to survive until the reference is dropped.
28 *
29 * LOCKING:
30 * There are 3 spinlocks involved with fsnotify inode marks and they MUST
31 * be taken in order as follows:
32 *
33 * mark->lock
34 * group->mark_lock
35 * inode->i_lock
36 *
37 * mark->lock protects 2 things, mark->group and mark->inode. You must hold
38 * that lock to dereference either of these things (they could be NULL even with
39 * the lock)
40 *
41 * group->mark_lock protects the marks_list anchored inside a given group
42 * and each mark is hooked via the g_list. It also sorta protects the
43 * free_g_list, which when used is anchored by a private list on the stack of the
44 * task which held the group->mark_lock.
45 *
46 * inode->i_lock protects the i_fsnotify_marks list anchored inside a
47 * given inode and each mark is hooked via the i_list. (and sorta the
48 * free_i_list)
49 *
50 *
51 * LIFETIME:
52 * Inode marks survive between when they are added to an inode and when their
53 * refcnt==0.
54 *
55 * The inode mark can be cleared for a number of different reasons including:
56 * - The inode is unlinked for the last time. (fsnotify_inode_remove)
57 * - The inode is being evicted from cache. (fsnotify_inode_delete)
58 * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes)
59 * - Something explicitly requests that it be removed. (fsnotify_destroy_mark)
60 * - The fsnotify_group associated with the mark is going away and all such marks
61 * need to be cleaned up. (fsnotify_clear_marks_by_group)
62 *
63 * Worst case we are given an inode and need to clean up all the marks on that
64 * inode. We take i_lock and walk the i_fsnotify_marks safely. For each
65 * mark on the list we take a reference (so the mark can't disappear under us).
66 * We remove that mark form the inode's list of marks and we add this mark to a
67 * private list anchored on the stack using i_free_list; At this point we no
68 * longer fear anything finding the mark using the inode's list of marks.
69 *
70 * We can safely and locklessly run the private list on the stack of everything
71 * we just unattached from the original inode. For each mark on the private list
72 * we grab the mark-> and can thus dereference mark->group and mark->inode. If
73 * we see the group and inode are not NULL we take those locks. Now holding all
74 * 3 locks we can completely remove the mark from other tasks finding it in the
75 * future. Remember, 10 things might already be referencing this mark, but they
76 * better be holding a ref. We drop our reference we took before we unhooked it
77 * from the inode. When the ref hits 0 we can free the mark.
78 *
79 * Very similarly for freeing by group, except we use free_g_list.
80 *
81 * This has the very interesting property of being able to run concurrently with
82 * any (or all) other directions.
83 */
84
85#include <linux/fs.h>
86#include <linux/init.h>
87#include <linux/kernel.h>
88#include <linux/module.h>
89#include <linux/mutex.h>
90#include <linux/slab.h>
91#include <linux/spinlock.h>
92#include <linux/writeback.h> /* for inode_lock */
93
94#include <asm/atomic.h>
95
96#include <linux/fsnotify_backend.h>
97#include "fsnotify.h"
98
99void fsnotify_get_mark(struct fsnotify_mark *mark)
100{
101 atomic_inc(&mark->refcnt);
102}
103
104void fsnotify_put_mark(struct fsnotify_mark *mark)
105{
106 if (atomic_dec_and_test(&mark->refcnt))
107 mark->free_mark(mark);
108}
109
110/*
111 * Any time a mark is getting freed we end up here.
112 * The caller had better be holding a reference to this mark so we don't actually
113 * do the final put under the mark->lock
114 */
115void fsnotify_destroy_mark(struct fsnotify_mark *mark)
116{
117 struct fsnotify_group *group;
118 struct inode *inode;
119
120 spin_lock(&mark->lock);
121
122 group = mark->group;
123 inode = mark->i.inode;
124
125 BUG_ON(group && !inode);
126 BUG_ON(!group && inode);
127
128 /* if !group something else already marked this to die */
129 if (!group) {
130 spin_unlock(&mark->lock);
131 return;
132 }
133
134 /* 1 from caller and 1 for being on i_list/g_list */
135 BUG_ON(atomic_read(&mark->refcnt) < 2);
136
137 spin_lock(&group->mark_lock);
138
139 if (mark->flags & FSNOTIFY_MARK_FLAG_INODE)
140 fsnotify_destroy_inode_mark(mark);
141 else
142 BUG();
143
144 list_del_init(&mark->g_list);
145 mark->group = NULL;
146
147 fsnotify_put_mark(mark); /* for i_list and g_list */
148
149 spin_unlock(&group->mark_lock);
150 spin_unlock(&mark->lock);
151
152 /*
153 * Some groups like to know that marks are being freed. This is a
154 * callback to the group function to let it know that this mark
155 * is being freed.
156 */
157 if (group->ops->freeing_mark)
158 group->ops->freeing_mark(mark, group);
159
160 /*
161 * __fsnotify_update_child_dentry_flags(inode);
162 *
163 * I really want to call that, but we can't, we have no idea if the inode
164 * still exists the second we drop the mark->lock.
165 *
166 * The next time an event arrive to this inode from one of it's children
167 * __fsnotify_parent will see that the inode doesn't care about it's
168 * children and will update all of these flags then. So really this
169 * is just a lazy update (and could be a perf win...)
170 */
171
172
173 iput(inode);
174
175 /*
176 * it's possible that this group tried to destroy itself, but this
177 * this mark was simultaneously being freed by inode. If that's the
178 * case, we finish freeing the group here.
179 */
180 if (unlikely(atomic_dec_and_test(&group->num_marks)))
181 fsnotify_final_destroy_group(group);
182}
183
184/*
185 * Attach an initialized mark to a given group and fs object.
186 * These marks may be used for the fsnotify backend to determine which
187 * event types should be delivered to which group.
188 */
189int fsnotify_add_mark(struct fsnotify_mark *mark,
190 struct fsnotify_group *group, struct inode *inode,
191 struct vfsmount *mnt, int allow_dups)
192{
193 int ret = 0;
194
195 BUG_ON(mnt);
196 BUG_ON(inode && mnt);
197 BUG_ON(!inode && !mnt);
198
199 /*
200 * if this group isn't being testing for inode type events we need
201 * to start testing
202 */
203 if (inode && unlikely(list_empty(&group->inode_group_list)))
204 fsnotify_add_inode_group(group);
205 else if (mnt && unlikely(list_empty(&group->vfsmount_group_list)))
206 fsnotify_add_vfsmount_group(group);
207
208 /*
209 * LOCKING ORDER!!!!
210 * mark->lock
211 * group->mark_lock
212 * inode->i_lock
213 */
214 spin_lock(&mark->lock);
215 spin_lock(&group->mark_lock);
216
217 mark->group = group;
218 list_add(&mark->g_list, &group->marks_list);
219 atomic_inc(&group->num_marks);
220 fsnotify_get_mark(mark); /* for i_list and g_list */
221
222 if (inode) {
223 ret = fsnotify_add_inode_mark(mark, group, inode, allow_dups);
224 if (ret)
225 goto err;
226 } else {
227 BUG();
228 }
229
230 spin_unlock(&group->mark_lock);
231 spin_unlock(&mark->lock);
232
233 if (inode)
234 __fsnotify_update_child_dentry_flags(inode);
235
236 return ret;
237err:
238 mark->group = NULL;
239 list_del_init(&mark->g_list);
240 atomic_dec(&group->num_marks);
241 fsnotify_put_mark(mark);
242
243 spin_unlock(&group->mark_lock);
244 spin_unlock(&mark->lock);
245
246 return ret;
247}
248
249/*
250 * Given a group, destroy all of the marks associated with that group.
251 */
252void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
253{
254 struct fsnotify_mark *lmark, *mark;
255 LIST_HEAD(free_list);
256
257 spin_lock(&group->mark_lock);
258 list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
259 list_add(&mark->free_g_list, &free_list);
260 list_del_init(&mark->g_list);
261 fsnotify_get_mark(mark);
262 }
263 spin_unlock(&group->mark_lock);
264
265 list_for_each_entry_safe(mark, lmark, &free_list, free_g_list) {
266 fsnotify_destroy_mark(mark);
267 fsnotify_put_mark(mark);
268 }
269}
270
271void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
272{
273 assert_spin_locked(&old->lock);
274 new->i.inode = old->i.inode;
275 new->m.mnt = old->m.mnt;
276 new->group = old->group;
277 new->mask = old->mask;
278 new->free_mark = old->free_mark;
279}
280
281/*
282 * Nothing fancy, just initialize lists and locks and counters.
283 */
284void fsnotify_init_mark(struct fsnotify_mark *mark,
285 void (*free_mark)(struct fsnotify_mark *mark))
286{
287 spin_lock_init(&mark->lock);
288 atomic_set(&mark->refcnt, 1);
289 INIT_HLIST_NODE(&mark->i.i_list);
290 mark->group = NULL;
291 mark->mask = 0;
292 mark->i.inode = NULL;
293 mark->free_mark = free_mark;
294}
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 7d93572ec568..27cccbecbf23 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -364,11 +364,12 @@ extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group
364extern void fsnotify_recalc_inode_mask(struct inode *inode); 364extern void fsnotify_recalc_inode_mask(struct inode *inode);
365extern void fsnotify_init_mark(struct fsnotify_mark *mark, void (*free_mark)(struct fsnotify_mark *mark)); 365extern void fsnotify_init_mark(struct fsnotify_mark *mark, void (*free_mark)(struct fsnotify_mark *mark));
366/* find (and take a reference) to a mark associated with group and inode */ 366/* find (and take a reference) to a mark associated with group and inode */
367extern struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_group *group, struct inode *inode); 367extern struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, struct inode *inode);
368/* copy the values from old into new */ 368/* copy the values from old into new */
369extern void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old); 369extern void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old);
370/* attach the mark to both the group and the inode */ 370/* attach the mark to both the group and the inode */
371extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, int allow_dups); 371extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
372 struct inode *inode, struct vfsmount *mnt, int allow_dups);
372/* given a mark, flag it to be freed when all references are dropped */ 373/* given a mark, flag it to be freed when all references are dropped */
373extern void fsnotify_destroy_mark(struct fsnotify_mark *mark); 374extern void fsnotify_destroy_mark(struct fsnotify_mark *mark);
374/* run all the marks in a group, and flag them to be freed */ 375/* run all the marks in a group, and flag them to be freed */
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 80f8ac328aad..cfb97d752a61 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -259,7 +259,7 @@ static void untag_chunk(struct node *p)
259 if (!new) 259 if (!new)
260 goto Fallback; 260 goto Fallback;
261 fsnotify_duplicate_mark(&new->mark, entry); 261 fsnotify_duplicate_mark(&new->mark, entry);
262 if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, 1)) { 262 if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) {
263 free_chunk(new); 263 free_chunk(new);
264 goto Fallback; 264 goto Fallback;
265 } 265 }
@@ -322,7 +322,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)
322 return -ENOMEM; 322 return -ENOMEM;
323 323
324 entry = &chunk->mark; 324 entry = &chunk->mark;
325 if (fsnotify_add_mark(entry, audit_tree_group, inode, 0)) { 325 if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) {
326 free_chunk(chunk); 326 free_chunk(chunk);
327 return -ENOSPC; 327 return -ENOSPC;
328 } 328 }
@@ -360,7 +360,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
360 struct node *p; 360 struct node *p;
361 int n; 361 int n;
362 362
363 old_entry = fsnotify_find_mark(audit_tree_group, inode); 363 old_entry = fsnotify_find_inode_mark(audit_tree_group, inode);
364 if (!old_entry) 364 if (!old_entry)
365 return create_chunk(inode, tree); 365 return create_chunk(inode, tree);
366 366
@@ -395,7 +395,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
395 } 395 }
396 396
397 fsnotify_duplicate_mark(chunk_entry, old_entry); 397 fsnotify_duplicate_mark(chunk_entry, old_entry);
398 if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, 1)) { 398 if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) {
399 spin_unlock(&old_entry->lock); 399 spin_unlock(&old_entry->lock);
400 free_chunk(chunk); 400 free_chunk(chunk);
401 fsnotify_put_mark(old_entry); 401 fsnotify_put_mark(old_entry);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index d85fa538a722..7499397a6100 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -101,7 +101,7 @@ static inline struct audit_parent *audit_find_parent(struct inode *inode)
101 struct audit_parent *parent = NULL; 101 struct audit_parent *parent = NULL;
102 struct fsnotify_mark *entry; 102 struct fsnotify_mark *entry;
103 103
104 entry = fsnotify_find_mark(audit_watch_group, inode); 104 entry = fsnotify_find_inode_mark(audit_watch_group, inode);
105 if (entry) 105 if (entry)
106 parent = container_of(entry, struct audit_parent, mark); 106 parent = container_of(entry, struct audit_parent, mark);
107 107
@@ -158,7 +158,7 @@ static struct audit_parent *audit_init_parent(struct nameidata *ndp)
158 158
159 fsnotify_init_mark(&parent->mark, audit_watch_free_mark); 159 fsnotify_init_mark(&parent->mark, audit_watch_free_mark);
160 parent->mark.mask = AUDIT_FS_WATCH; 160 parent->mark.mask = AUDIT_FS_WATCH;
161 ret = fsnotify_add_mark(&parent->mark, audit_watch_group, inode, 0); 161 ret = fsnotify_add_mark(&parent->mark, audit_watch_group, inode, NULL, 0);
162 if (ret < 0) { 162 if (ret < 0) {
163 audit_free_parent(parent); 163 audit_free_parent(parent);
164 return ERR_PTR(ret); 164 return ERR_PTR(ret);
@@ -517,7 +517,7 @@ static bool audit_watch_should_send_event(struct fsnotify_group *group, struct i
517 struct fsnotify_mark *entry; 517 struct fsnotify_mark *entry;
518 bool send; 518 bool send;
519 519
520 entry = fsnotify_find_mark(group, inode); 520 entry = fsnotify_find_inode_mark(group, inode);
521 if (!entry) 521 if (!entry)
522 return false; 522 return false;
523 523