aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2014-01-21 18:48:14 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-21 19:19:41 -0500
commit7053aee26a3548ebaba046ae2e52396ccf56ac6c (patch)
tree1d21fa9409fede7b908ac08df2984766120448db /include
parente9fe69045bd648d75d8d8099b8658a4ee005a8e5 (diff)
fsnotify: do not share events between notification groups
Currently fsnotify framework creates one event structure for each notification event and links this event into all interested notification groups. This is done so that we save memory when several notification groups are interested in the event. However the need for event structure shared between inotify & fanotify bloats the event structure so the result is often higher memory consumption. Another problem is that fsnotify framework keeps path references with outstanding events so that fanotify can return open file descriptors with its events. This has the undesirable effect that filesystem cannot be unmounted while there are outstanding events - a regression for inotify compared to a situation before it was converted to fsnotify framework. For fanotify this problem is hard to avoid and users of fanotify should kind of expect this behavior when they ask for file descriptors from notified files. This patch changes fsnotify and its users to create separate event structure for each group. This allows for much simpler code (~400 lines removed by this patch) and also smaller event structures. For example on 64-bit system original struct fsnotify_event consumes 120 bytes, plus additional space for file name, additional 24 bytes for second and each subsequent group linking the event, and additional 32 bytes for each inotify group for private data. After the conversion inotify event consumes 48 bytes plus space for file name which is considerably less memory unless file names are long and there are several groups interested in the events (both of which are uncommon). Fanotify event fits in 56 bytes after the conversion (fanotify doesn't care about file names so its events don't have to have it allocated). A win unless there are four or more fanotify groups interested in the event. The conversion also solves the problem with unmount when only inotify is used as we don't have to grab path references for inotify events. [hughd@google.com: fanotify: fix corruption preventing startup] Signed-off-by: Jan Kara <jack@suse.cz> Reviewed-by: Christoph Hellwig <hch@lst.de> Cc: Eric Paris <eparis@parisplace.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/fsnotify_backend.h114
1 files changed, 27 insertions, 87 deletions
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 4b2ee8d12f5e..7f3d7dcfcd00 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -15,7 +15,6 @@
15#include <linux/path.h> /* struct path */ 15#include <linux/path.h> /* struct path */
16#include <linux/spinlock.h> 16#include <linux/spinlock.h>
17#include <linux/types.h> 17#include <linux/types.h>
18
19#include <linux/atomic.h> 18#include <linux/atomic.h>
20 19
21/* 20/*
@@ -79,6 +78,7 @@ struct fsnotify_group;
79struct fsnotify_event; 78struct fsnotify_event;
80struct fsnotify_mark; 79struct fsnotify_mark;
81struct fsnotify_event_private_data; 80struct fsnotify_event_private_data;
81struct fsnotify_fname;
82 82
83/* 83/*
84 * Each group much define these ops. The fsnotify infrastructure will call 84 * Each group much define these ops. The fsnotify infrastructure will call
@@ -99,12 +99,26 @@ struct fsnotify_ops {
99 struct fsnotify_mark *vfsmount_mark, 99 struct fsnotify_mark *vfsmount_mark,
100 __u32 mask, void *data, int data_type); 100 __u32 mask, void *data, int data_type);
101 int (*handle_event)(struct fsnotify_group *group, 101 int (*handle_event)(struct fsnotify_group *group,
102 struct inode *inode,
102 struct fsnotify_mark *inode_mark, 103 struct fsnotify_mark *inode_mark,
103 struct fsnotify_mark *vfsmount_mark, 104 struct fsnotify_mark *vfsmount_mark,
104 struct fsnotify_event *event); 105 u32 mask, void *data, int data_type,
106 const unsigned char *file_name);
105 void (*free_group_priv)(struct fsnotify_group *group); 107 void (*free_group_priv)(struct fsnotify_group *group);
106 void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); 108 void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group);
107 void (*free_event_priv)(struct fsnotify_event_private_data *priv); 109 void (*free_event)(struct fsnotify_event *event);
110};
111
112/*
113 * all of the information about the original object we want to now send to
114 * a group. If you want to carry more info from the accessing task to the
115 * listener this structure is where you need to be adding fields.
116 */
117struct fsnotify_event {
118 struct list_head list;
119 /* inode may ONLY be dereferenced during handle_event(). */
120 struct inode *inode; /* either the inode the event happened to or its parent */
121 u32 mask; /* the type of access, bitwise OR for FS_* event types */
108}; 122};
109 123
110/* 124/*
@@ -148,7 +162,11 @@ struct fsnotify_group {
148 * a group */ 162 * a group */
149 struct list_head marks_list; /* all inode marks for this group */ 163 struct list_head marks_list; /* all inode marks for this group */
150 164
151 struct fasync_struct *fsn_fa; /* async notification */ 165 struct fasync_struct *fsn_fa; /* async notification */
166
167 struct fsnotify_event overflow_event; /* Event we queue when the
168 * notification list is too
169 * full */
152 170
153 /* groups can define private fields here or use the void *private */ 171 /* groups can define private fields here or use the void *private */
154 union { 172 union {
@@ -177,76 +195,10 @@ struct fsnotify_group {
177 }; 195 };
178}; 196};
179 197
180/*
181 * A single event can be queued in multiple group->notification_lists.
182 *
183 * each group->notification_list will point to an event_holder which in turns points
184 * to the actual event that needs to be sent to userspace.
185 *
186 * Seemed cheaper to create a refcnt'd event and a small holder for every group
187 * than create a different event for every group
188 *
189 */
190struct fsnotify_event_holder {
191 struct fsnotify_event *event;
192 struct list_head event_list;
193};
194
195/*
196 * Inotify needs to tack data onto an event. This struct lets us later find the
197 * correct private data of the correct group.
198 */
199struct fsnotify_event_private_data {
200 struct fsnotify_group *group;
201 struct list_head event_list;
202};
203
204/*
205 * all of the information about the original object we want to now send to
206 * a group. If you want to carry more info from the accessing task to the
207 * listener this structure is where you need to be adding fields.
208 */
209struct fsnotify_event {
210 /*
211 * If we create an event we are also likely going to need a holder
212 * to link to a group. So embed one holder in the event. Means only
213 * one allocation for the common case where we only have one group
214 */
215 struct fsnotify_event_holder holder;
216 spinlock_t lock; /* protection for the associated event_holder and private_list */
217 /* to_tell may ONLY be dereferenced during handle_event(). */
218 struct inode *to_tell; /* either the inode the event happened to or its parent */
219 /*
220 * depending on the event type we should have either a path or inode
221 * We hold a reference on path, but NOT on inode. Since we have the ref on
222 * the path, it may be dereferenced at any point during this object's
223 * lifetime. That reference is dropped when this object's refcnt hits
224 * 0. If this event contains an inode instead of a path, the inode may
225 * ONLY be used during handle_event().
226 */
227 union {
228 struct path path;
229 struct inode *inode;
230 };
231/* when calling fsnotify tell it if the data is a path or inode */ 198/* when calling fsnotify tell it if the data is a path or inode */
232#define FSNOTIFY_EVENT_NONE 0 199#define FSNOTIFY_EVENT_NONE 0
233#define FSNOTIFY_EVENT_PATH 1 200#define FSNOTIFY_EVENT_PATH 1
234#define FSNOTIFY_EVENT_INODE 2 201#define FSNOTIFY_EVENT_INODE 2
235 int data_type; /* which of the above union we have */
236 atomic_t refcnt; /* how many groups still are using/need to send this event */
237 __u32 mask; /* the type of access, bitwise OR for FS_* event types */
238
239 u32 sync_cookie; /* used to corrolate events, namely inotify mv events */
240 const unsigned char *file_name;
241 size_t name_len;
242 struct pid *tgid;
243
244#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
245 __u32 response; /* userspace answer to question */
246#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */
247
248 struct list_head private_data_list; /* groups can store private data here */
249};
250 202
251/* 203/*
252 * Inode specific fields in an fsnotify_mark 204 * Inode specific fields in an fsnotify_mark
@@ -370,17 +322,12 @@ extern void fsnotify_put_group(struct fsnotify_group *group);
370extern void fsnotify_destroy_group(struct fsnotify_group *group); 322extern void fsnotify_destroy_group(struct fsnotify_group *group);
371/* fasync handler function */ 323/* fasync handler function */
372extern int fsnotify_fasync(int fd, struct file *file, int on); 324extern int fsnotify_fasync(int fd, struct file *file, int on);
373/* take a reference to an event */ 325/* Free event from memory */
374extern void fsnotify_get_event(struct fsnotify_event *event); 326extern void fsnotify_destroy_event(struct fsnotify_group *group,
375extern void fsnotify_put_event(struct fsnotify_event *event); 327 struct fsnotify_event *event);
376/* find private data previously attached to an event and unlink it */
377extern struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group,
378 struct fsnotify_event *event);
379
380/* attach the event to the group notification queue */ 328/* attach the event to the group notification queue */
381extern struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, 329extern struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group,
382 struct fsnotify_event *event, 330 struct fsnotify_event *event,
383 struct fsnotify_event_private_data *priv,
384 struct fsnotify_event *(*merge)(struct list_head *, 331 struct fsnotify_event *(*merge)(struct list_head *,
385 struct fsnotify_event *)); 332 struct fsnotify_event *));
386/* true if the group notification queue is empty */ 333/* true if the group notification queue is empty */
@@ -430,15 +377,8 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark);
430extern void fsnotify_unmount_inodes(struct list_head *list); 377extern void fsnotify_unmount_inodes(struct list_head *list);
431 378
432/* put here because inotify does some weird stuff when destroying watches */ 379/* put here because inotify does some weird stuff when destroying watches */
433extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, 380extern void fsnotify_init_event(struct fsnotify_event *event,
434 void *data, int data_is, 381 struct inode *to_tell, u32 mask);
435 const unsigned char *name,
436 u32 cookie, gfp_t gfp);
437
438/* fanotify likes to change events after they are on lists... */
439extern struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event);
440extern int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
441 struct fsnotify_event *new_event);
442 382
443#else 383#else
444 384