aboutsummaryrefslogtreecommitdiffstats
path: root/fs/notify/fsnotify.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2014-01-21 18:48:14 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-21 19:19:41 -0500
commit7053aee26a3548ebaba046ae2e52396ccf56ac6c (patch)
tree1d21fa9409fede7b908ac08df2984766120448db /fs/notify/fsnotify.c
parente9fe69045bd648d75d8d8099b8658a4ee005a8e5 (diff)
fsnotify: do not share events between notification groups
Currently fsnotify framework creates one event structure for each notification event and links this event into all interested notification groups. This is done so that we save memory when several notification groups are interested in the event. However the need for event structure shared between inotify & fanotify bloats the event structure so the result is often higher memory consumption. Another problem is that fsnotify framework keeps path references with outstanding events so that fanotify can return open file descriptors with its events. This has the undesirable effect that filesystem cannot be unmounted while there are outstanding events - a regression for inotify compared to a situation before it was converted to fsnotify framework. For fanotify this problem is hard to avoid and users of fanotify should kind of expect this behavior when they ask for file descriptors from notified files. This patch changes fsnotify and its users to create separate event structure for each group. This allows for much simpler code (~400 lines removed by this patch) and also smaller event structures. For example on 64-bit system original struct fsnotify_event consumes 120 bytes, plus additional space for file name, additional 24 bytes for second and each subsequent group linking the event, and additional 32 bytes for each inotify group for private data. After the conversion inotify event consumes 48 bytes plus space for file name which is considerably less memory unless file names are long and there are several groups interested in the events (both of which are uncommon). Fanotify event fits in 56 bytes after the conversion (fanotify doesn't care about file names so its events don't have to have it allocated). A win unless there are four or more fanotify groups interested in the event. The conversion also solves the problem with unmount when only inotify is used as we don't have to grab path references for inotify events. [hughd@google.com: fanotify: fix corruption preventing startup] Signed-off-by: Jan Kara <jack@suse.cz> Reviewed-by: Christoph Hellwig <hch@lst.de> Cc: Eric Paris <eparis@parisplace.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/notify/fsnotify.c')
-rw-r--r--fs/notify/fsnotify.c37
1 files changed, 12 insertions, 25 deletions
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 4bb21d67d9b1..7c754c91c3f6 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -128,8 +128,7 @@ static int send_to_group(struct inode *to_tell,
128 struct fsnotify_mark *vfsmount_mark, 128 struct fsnotify_mark *vfsmount_mark,
129 __u32 mask, void *data, 129 __u32 mask, void *data,
130 int data_is, u32 cookie, 130 int data_is, u32 cookie,
131 const unsigned char *file_name, 131 const unsigned char *file_name)
132 struct fsnotify_event **event)
133{ 132{
134 struct fsnotify_group *group = NULL; 133 struct fsnotify_group *group = NULL;
135 __u32 inode_test_mask = 0; 134 __u32 inode_test_mask = 0;
@@ -170,10 +169,10 @@ static int send_to_group(struct inode *to_tell,
170 169
171 pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p" 170 pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p"
172 " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x" 171 " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x"
173 " data=%p data_is=%d cookie=%d event=%p\n", 172 " data=%p data_is=%d cookie=%d\n",
174 __func__, group, to_tell, mask, inode_mark, 173 __func__, group, to_tell, mask, inode_mark,
175 inode_test_mask, vfsmount_mark, vfsmount_test_mask, data, 174 inode_test_mask, vfsmount_mark, vfsmount_test_mask, data,
176 data_is, cookie, *event); 175 data_is, cookie);
177 176
178 if (!inode_test_mask && !vfsmount_test_mask) 177 if (!inode_test_mask && !vfsmount_test_mask)
179 return 0; 178 return 0;
@@ -183,14 +182,9 @@ static int send_to_group(struct inode *to_tell,
183 data_is) == false) 182 data_is) == false)
184 return 0; 183 return 0;
185 184
186 if (!*event) { 185 return group->ops->handle_event(group, to_tell, inode_mark,
187 *event = fsnotify_create_event(to_tell, mask, data, 186 vfsmount_mark, mask, data, data_is,
188 data_is, file_name, 187 file_name);
189 cookie, GFP_KERNEL);
190 if (!*event)
191 return -ENOMEM;
192 }
193 return group->ops->handle_event(group, inode_mark, vfsmount_mark, *event);
194} 188}
195 189
196/* 190/*
@@ -205,7 +199,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
205 struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; 199 struct hlist_node *inode_node = NULL, *vfsmount_node = NULL;
206 struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; 200 struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
207 struct fsnotify_group *inode_group, *vfsmount_group; 201 struct fsnotify_group *inode_group, *vfsmount_group;
208 struct fsnotify_event *event = NULL;
209 struct mount *mnt; 202 struct mount *mnt;
210 int idx, ret = 0; 203 int idx, ret = 0;
211 /* global tests shouldn't care about events on child only the specific event */ 204 /* global tests shouldn't care about events on child only the specific event */
@@ -258,18 +251,18 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
258 251
259 if (inode_group > vfsmount_group) { 252 if (inode_group > vfsmount_group) {
260 /* handle inode */ 253 /* handle inode */
261 ret = send_to_group(to_tell, inode_mark, NULL, mask, data, 254 ret = send_to_group(to_tell, inode_mark, NULL, mask,
262 data_is, cookie, file_name, &event); 255 data, data_is, cookie, file_name);
263 /* we didn't use the vfsmount_mark */ 256 /* we didn't use the vfsmount_mark */
264 vfsmount_group = NULL; 257 vfsmount_group = NULL;
265 } else if (vfsmount_group > inode_group) { 258 } else if (vfsmount_group > inode_group) {
266 ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, data, 259 ret = send_to_group(to_tell, NULL, vfsmount_mark, mask,
267 data_is, cookie, file_name, &event); 260 data, data_is, cookie, file_name);
268 inode_group = NULL; 261 inode_group = NULL;
269 } else { 262 } else {
270 ret = send_to_group(to_tell, inode_mark, vfsmount_mark, 263 ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
271 mask, data, data_is, cookie, file_name, 264 mask, data, data_is, cookie,
272 &event); 265 file_name);
273 } 266 }
274 267
275 if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) 268 if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
@@ -285,12 +278,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
285 ret = 0; 278 ret = 0;
286out: 279out:
287 srcu_read_unlock(&fsnotify_mark_srcu, idx); 280 srcu_read_unlock(&fsnotify_mark_srcu, idx);
288 /*
289 * fsnotify_create_event() took a reference so the event can't be cleaned
290 * up while we are still trying to add it to lists, drop that one.
291 */
292 if (event)
293 fsnotify_put_event(event);
294 281
295 return ret; 282 return ret;
296} 283}