aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2014-01-21 18:48:14 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-21 19:19:41 -0500
commit7053aee26a3548ebaba046ae2e52396ccf56ac6c (patch)
tree1d21fa9409fede7b908ac08df2984766120448db /fs
parente9fe69045bd648d75d8d8099b8658a4ee005a8e5 (diff)
fsnotify: do not share events between notification groups
Currently fsnotify framework creates one event structure for each notification event and links this event into all interested notification groups. This is done so that we save memory when several notification groups are interested in the event. However the need for event structure shared between inotify & fanotify bloats the event structure so the result is often higher memory consumption. Another problem is that fsnotify framework keeps path references with outstanding events so that fanotify can return open file descriptors with its events. This has the undesirable effect that filesystem cannot be unmounted while there are outstanding events - a regression for inotify compared to a situation before it was converted to fsnotify framework. For fanotify this problem is hard to avoid and users of fanotify should kind of expect this behavior when they ask for file descriptors from notified files. This patch changes fsnotify and its users to create separate event structure for each group. This allows for much simpler code (~400 lines removed by this patch) and also smaller event structures. For example on 64-bit system original struct fsnotify_event consumes 120 bytes, plus additional space for file name, additional 24 bytes for second and each subsequent group linking the event, and additional 32 bytes for each inotify group for private data. After the conversion inotify event consumes 48 bytes plus space for file name which is considerably less memory unless file names are long and there are several groups interested in the events (both of which are uncommon). Fanotify event fits in 56 bytes after the conversion (fanotify doesn't care about file names so its events don't have to have it allocated). A win unless there are four or more fanotify groups interested in the event. The conversion also solves the problem with unmount when only inotify is used as we don't have to grab path references for inotify events. [hughd@google.com: fanotify: fix corruption preventing startup] Signed-off-by: Jan Kara <jack@suse.cz> Reviewed-by: Christoph Hellwig <hch@lst.de> Cc: Eric Paris <eparis@parisplace.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/notify/dnotify/dnotify.c11
-rw-r--r--fs/notify/fanotify/fanotify.c211
-rw-r--r--fs/notify/fanotify/fanotify.h23
-rw-r--r--fs/notify/fanotify/fanotify_user.c41
-rw-r--r--fs/notify/fsnotify.c37
-rw-r--r--fs/notify/group.c1
-rw-r--r--fs/notify/inotify/inotify.h21
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c125
-rw-r--r--fs/notify/inotify/inotify_user.c86
-rw-r--r--fs/notify/notification.c334
10 files changed, 279 insertions, 611 deletions
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 1fedd5f7ccc4..bfca53dbbf34 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -82,21 +82,20 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
82 * events. 82 * events.
83 */ 83 */
84static int dnotify_handle_event(struct fsnotify_group *group, 84static int dnotify_handle_event(struct fsnotify_group *group,
85 struct inode *inode,
85 struct fsnotify_mark *inode_mark, 86 struct fsnotify_mark *inode_mark,
86 struct fsnotify_mark *vfsmount_mark, 87 struct fsnotify_mark *vfsmount_mark,
87 struct fsnotify_event *event) 88 u32 mask, void *data, int data_type,
89 const unsigned char *file_name)
88{ 90{
89 struct dnotify_mark *dn_mark; 91 struct dnotify_mark *dn_mark;
90 struct inode *to_tell;
91 struct dnotify_struct *dn; 92 struct dnotify_struct *dn;
92 struct dnotify_struct **prev; 93 struct dnotify_struct **prev;
93 struct fown_struct *fown; 94 struct fown_struct *fown;
94 __u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD; 95 __u32 test_mask = mask & ~FS_EVENT_ON_CHILD;
95 96
96 BUG_ON(vfsmount_mark); 97 BUG_ON(vfsmount_mark);
97 98
98 to_tell = event->to_tell;
99
100 dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark); 99 dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark);
101 100
102 spin_lock(&inode_mark->lock); 101 spin_lock(&inode_mark->lock);
@@ -155,7 +154,7 @@ static struct fsnotify_ops dnotify_fsnotify_ops = {
155 .should_send_event = dnotify_should_send_event, 154 .should_send_event = dnotify_should_send_event,
156 .free_group_priv = NULL, 155 .free_group_priv = NULL,
157 .freeing_mark = NULL, 156 .freeing_mark = NULL,
158 .free_event_priv = NULL, 157 .free_event = NULL,
159}; 158};
160 159
161/* 160/*
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 0c2f9122b262..c26268d7bd9d 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -9,31 +9,27 @@
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/wait.h> 10#include <linux/wait.h>
11 11
12static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new) 12#include "fanotify.h"
13
14static bool should_merge(struct fsnotify_event *old_fsn,
15 struct fsnotify_event *new_fsn)
13{ 16{
14 pr_debug("%s: old=%p new=%p\n", __func__, old, new); 17 struct fanotify_event_info *old, *new;
15 18
16 if (old->to_tell == new->to_tell &&
17 old->data_type == new->data_type &&
18 old->tgid == new->tgid) {
19 switch (old->data_type) {
20 case (FSNOTIFY_EVENT_PATH):
21#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 19#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
22 /* dont merge two permission events */ 20 /* dont merge two permission events */
23 if ((old->mask & FAN_ALL_PERM_EVENTS) && 21 if ((old_fsn->mask & FAN_ALL_PERM_EVENTS) &&
24 (new->mask & FAN_ALL_PERM_EVENTS)) 22 (new_fsn->mask & FAN_ALL_PERM_EVENTS))
25 return false; 23 return false;
26#endif 24#endif
27 if ((old->path.mnt == new->path.mnt) && 25 pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn);
28 (old->path.dentry == new->path.dentry)) 26 old = FANOTIFY_E(old_fsn);
29 return true; 27 new = FANOTIFY_E(new_fsn);
30 break; 28
31 case (FSNOTIFY_EVENT_NONE): 29 if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid &&
32 return true; 30 old->path.mnt == new->path.mnt &&
33 default: 31 old->path.dentry == new->path.dentry)
34 BUG(); 32 return true;
35 };
36 }
37 return false; 33 return false;
38} 34}
39 35
@@ -41,59 +37,28 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
41static struct fsnotify_event *fanotify_merge(struct list_head *list, 37static struct fsnotify_event *fanotify_merge(struct list_head *list,
42 struct fsnotify_event *event) 38 struct fsnotify_event *event)
43{ 39{
44 struct fsnotify_event_holder *test_holder; 40 struct fsnotify_event *test_event;
45 struct fsnotify_event *test_event = NULL; 41 bool do_merge = false;
46 struct fsnotify_event *new_event;
47 42
48 pr_debug("%s: list=%p event=%p\n", __func__, list, event); 43 pr_debug("%s: list=%p event=%p\n", __func__, list, event);
49 44
50 45 list_for_each_entry_reverse(test_event, list, list) {
51 list_for_each_entry_reverse(test_holder, list, event_list) { 46 if (should_merge(test_event, event)) {
52 if (should_merge(test_holder->event, event)) { 47 do_merge = true;
53 test_event = test_holder->event;
54 break; 48 break;
55 } 49 }
56 } 50 }
57 51
58 if (!test_event) 52 if (!do_merge)
59 return NULL; 53 return NULL;
60 54
61 fsnotify_get_event(test_event); 55 test_event->mask |= event->mask;
62 56 return test_event;
63 /* if they are exactly the same we are done */
64 if (test_event->mask == event->mask)
65 return test_event;
66
67 /*
68 * if the refcnt == 2 this is the only queue
69 * for this event and so we can update the mask
70 * in place.
71 */
72 if (atomic_read(&test_event->refcnt) == 2) {
73 test_event->mask |= event->mask;
74 return test_event;
75 }
76
77 new_event = fsnotify_clone_event(test_event);
78
79 /* done with test_event */
80 fsnotify_put_event(test_event);
81
82 /* couldn't allocate memory, merge was not possible */
83 if (unlikely(!new_event))
84 return ERR_PTR(-ENOMEM);
85
86 /* build new event and replace it on the list */
87 new_event->mask = (test_event->mask | event->mask);
88 fsnotify_replace_event(test_holder, new_event);
89
90 /* we hold a reference on new_event from clone_event */
91 return new_event;
92} 57}
93 58
94#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 59#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
95static int fanotify_get_response_from_access(struct fsnotify_group *group, 60static int fanotify_get_response_from_access(struct fsnotify_group *group,
96 struct fsnotify_event *event) 61 struct fanotify_event_info *event)
97{ 62{
98 int ret; 63 int ret;
99 64
@@ -106,7 +71,6 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
106 return 0; 71 return 0;
107 72
108 /* userspace responded, convert to something usable */ 73 /* userspace responded, convert to something usable */
109 spin_lock(&event->lock);
110 switch (event->response) { 74 switch (event->response) {
111 case FAN_ALLOW: 75 case FAN_ALLOW:
112 ret = 0; 76 ret = 0;
@@ -116,7 +80,6 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
116 ret = -EPERM; 80 ret = -EPERM;
117 } 81 }
118 event->response = 0; 82 event->response = 0;
119 spin_unlock(&event->lock);
120 83
121 pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__, 84 pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
122 group, event, ret); 85 group, event, ret);
@@ -125,48 +88,8 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
125} 88}
126#endif 89#endif
127 90
128static int fanotify_handle_event(struct fsnotify_group *group,
129 struct fsnotify_mark *inode_mark,
130 struct fsnotify_mark *fanotify_mark,
131 struct fsnotify_event *event)
132{
133 int ret = 0;
134 struct fsnotify_event *notify_event = NULL;
135
136 BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
137 BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
138 BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
139 BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
140 BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
141 BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
142 BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
143 BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
144 BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
145 BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
146
147 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
148
149 notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
150 if (IS_ERR(notify_event))
151 return PTR_ERR(notify_event);
152
153#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
154 if (event->mask & FAN_ALL_PERM_EVENTS) {
155 /* if we merged we need to wait on the new event */
156 if (notify_event)
157 event = notify_event;
158 ret = fanotify_get_response_from_access(group, event);
159 }
160#endif
161
162 if (notify_event)
163 fsnotify_put_event(notify_event);
164
165 return ret;
166}
167
168static bool fanotify_should_send_event(struct fsnotify_group *group, 91static bool fanotify_should_send_event(struct fsnotify_group *group,
169 struct inode *to_tell, 92 struct inode *inode,
170 struct fsnotify_mark *inode_mark, 93 struct fsnotify_mark *inode_mark,
171 struct fsnotify_mark *vfsmnt_mark, 94 struct fsnotify_mark *vfsmnt_mark,
172 __u32 event_mask, void *data, int data_type) 95 __u32 event_mask, void *data, int data_type)
@@ -174,8 +97,8 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
174 __u32 marks_mask, marks_ignored_mask; 97 __u32 marks_mask, marks_ignored_mask;
175 struct path *path = data; 98 struct path *path = data;
176 99
177 pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p " 100 pr_debug("%s: group=%p inode=%p inode_mark=%p vfsmnt_mark=%p "
178 "mask=%x data=%p data_type=%d\n", __func__, group, to_tell, 101 "mask=%x data=%p data_type=%d\n", __func__, group, inode,
179 inode_mark, vfsmnt_mark, event_mask, data, data_type); 102 inode_mark, vfsmnt_mark, event_mask, data, data_type);
180 103
181 /* if we don't have enough info to send an event to userspace say no */ 104 /* if we don't have enough info to send an event to userspace say no */
@@ -217,6 +140,70 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
217 return false; 140 return false;
218} 141}
219 142
143static int fanotify_handle_event(struct fsnotify_group *group,
144 struct inode *inode,
145 struct fsnotify_mark *inode_mark,
146 struct fsnotify_mark *fanotify_mark,
147 u32 mask, void *data, int data_type,
148 const unsigned char *file_name)
149{
150 int ret = 0;
151 struct fanotify_event_info *event;
152 struct fsnotify_event *fsn_event;
153 struct fsnotify_event *notify_fsn_event;
154
155 BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
156 BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
157 BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
158 BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
159 BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
160 BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
161 BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
162 BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
163 BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
164 BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
165
166 pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
167 mask);
168
169 event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL);
170 if (unlikely(!event))
171 return -ENOMEM;
172
173 fsn_event = &event->fse;
174 fsnotify_init_event(fsn_event, inode, mask);
175 event->tgid = get_pid(task_tgid(current));
176 if (data_type == FSNOTIFY_EVENT_PATH) {
177 struct path *path = data;
178 event->path = *path;
179 path_get(&event->path);
180 } else {
181 event->path.mnt = NULL;
182 event->path.dentry = NULL;
183 }
184#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
185 event->response = 0;
186#endif
187
188 notify_fsn_event = fsnotify_add_notify_event(group, fsn_event,
189 fanotify_merge);
190 if (notify_fsn_event) {
191 /* Our event wasn't used in the end. Free it. */
192 fsnotify_destroy_event(group, fsn_event);
193 if (IS_ERR(notify_fsn_event))
194 return PTR_ERR(notify_fsn_event);
195 /* We need to ask about a different events after a merge... */
196 event = FANOTIFY_E(notify_fsn_event);
197 fsn_event = notify_fsn_event;
198 }
199
200#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
201 if (fsn_event->mask & FAN_ALL_PERM_EVENTS)
202 ret = fanotify_get_response_from_access(group, event);
203#endif
204 return ret;
205}
206
220static void fanotify_free_group_priv(struct fsnotify_group *group) 207static void fanotify_free_group_priv(struct fsnotify_group *group)
221{ 208{
222 struct user_struct *user; 209 struct user_struct *user;
@@ -226,10 +213,20 @@ static void fanotify_free_group_priv(struct fsnotify_group *group)
226 free_uid(user); 213 free_uid(user);
227} 214}
228 215
216static void fanotify_free_event(struct fsnotify_event *fsn_event)
217{
218 struct fanotify_event_info *event;
219
220 event = FANOTIFY_E(fsn_event);
221 path_put(&event->path);
222 put_pid(event->tgid);
223 kmem_cache_free(fanotify_event_cachep, event);
224}
225
229const struct fsnotify_ops fanotify_fsnotify_ops = { 226const struct fsnotify_ops fanotify_fsnotify_ops = {
230 .handle_event = fanotify_handle_event, 227 .handle_event = fanotify_handle_event,
231 .should_send_event = fanotify_should_send_event, 228 .should_send_event = fanotify_should_send_event,
232 .free_group_priv = fanotify_free_group_priv, 229 .free_group_priv = fanotify_free_group_priv,
233 .free_event_priv = NULL, 230 .free_event = fanotify_free_event,
234 .freeing_mark = NULL, 231 .freeing_mark = NULL,
235}; 232};
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
new file mode 100644
index 000000000000..0e90174a116a
--- /dev/null
+++ b/fs/notify/fanotify/fanotify.h
@@ -0,0 +1,23 @@
1#include <linux/fsnotify_backend.h>
2#include <linux/path.h>
3#include <linux/slab.h>
4
5extern struct kmem_cache *fanotify_event_cachep;
6
7struct fanotify_event_info {
8 struct fsnotify_event fse;
9 /*
10 * We hold ref to this path so it may be dereferenced at any point
11 * during this object's lifetime
12 */
13 struct path path;
14 struct pid *tgid;
15#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
16 u32 response; /* userspace answer to question */
17#endif
18};
19
20static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse)
21{
22 return container_of(fse, struct fanotify_event_info, fse);
23}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index e44cb6427df3..57d7c083cb4b 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -19,6 +19,7 @@
19 19
20#include "../../mount.h" 20#include "../../mount.h"
21#include "../fdinfo.h" 21#include "../fdinfo.h"
22#include "fanotify.h"
22 23
23#define FANOTIFY_DEFAULT_MAX_EVENTS 16384 24#define FANOTIFY_DEFAULT_MAX_EVENTS 16384
24#define FANOTIFY_DEFAULT_MAX_MARKS 8192 25#define FANOTIFY_DEFAULT_MAX_MARKS 8192
@@ -28,11 +29,12 @@ extern const struct fsnotify_ops fanotify_fsnotify_ops;
28 29
29static struct kmem_cache *fanotify_mark_cache __read_mostly; 30static struct kmem_cache *fanotify_mark_cache __read_mostly;
30static struct kmem_cache *fanotify_response_event_cache __read_mostly; 31static struct kmem_cache *fanotify_response_event_cache __read_mostly;
32struct kmem_cache *fanotify_event_cachep __read_mostly;
31 33
32struct fanotify_response_event { 34struct fanotify_response_event {
33 struct list_head list; 35 struct list_head list;
34 __s32 fd; 36 __s32 fd;
35 struct fsnotify_event *event; 37 struct fanotify_event_info *event;
36}; 38};
37 39
38/* 40/*
@@ -61,8 +63,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
61} 63}
62 64
63static int create_fd(struct fsnotify_group *group, 65static int create_fd(struct fsnotify_group *group,
64 struct fsnotify_event *event, 66 struct fanotify_event_info *event,
65 struct file **file) 67 struct file **file)
66{ 68{
67 int client_fd; 69 int client_fd;
68 struct file *new_file; 70 struct file *new_file;
@@ -73,12 +75,6 @@ static int create_fd(struct fsnotify_group *group,
73 if (client_fd < 0) 75 if (client_fd < 0)
74 return client_fd; 76 return client_fd;
75 77
76 if (event->data_type != FSNOTIFY_EVENT_PATH) {
77 WARN_ON(1);
78 put_unused_fd(client_fd);
79 return -EINVAL;
80 }
81
82 /* 78 /*
83 * we need a new file handle for the userspace program so it can read even if it was 79 * we need a new file handle for the userspace program so it can read even if it was
84 * originally opened O_WRONLY. 80 * originally opened O_WRONLY.
@@ -109,23 +105,25 @@ static int create_fd(struct fsnotify_group *group,
109} 105}
110 106
111static int fill_event_metadata(struct fsnotify_group *group, 107static int fill_event_metadata(struct fsnotify_group *group,
112 struct fanotify_event_metadata *metadata, 108 struct fanotify_event_metadata *metadata,
113 struct fsnotify_event *event, 109 struct fsnotify_event *fsn_event,
114 struct file **file) 110 struct file **file)
115{ 111{
116 int ret = 0; 112 int ret = 0;
113 struct fanotify_event_info *event;
117 114
118 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, 115 pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
119 group, metadata, event); 116 group, metadata, fsn_event);
120 117
121 *file = NULL; 118 *file = NULL;
119 event = container_of(fsn_event, struct fanotify_event_info, fse);
122 metadata->event_len = FAN_EVENT_METADATA_LEN; 120 metadata->event_len = FAN_EVENT_METADATA_LEN;
123 metadata->metadata_len = FAN_EVENT_METADATA_LEN; 121 metadata->metadata_len = FAN_EVENT_METADATA_LEN;
124 metadata->vers = FANOTIFY_METADATA_VERSION; 122 metadata->vers = FANOTIFY_METADATA_VERSION;
125 metadata->reserved = 0; 123 metadata->reserved = 0;
126 metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; 124 metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS;
127 metadata->pid = pid_vnr(event->tgid); 125 metadata->pid = pid_vnr(event->tgid);
128 if (unlikely(event->mask & FAN_Q_OVERFLOW)) 126 if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW))
129 metadata->fd = FAN_NOFD; 127 metadata->fd = FAN_NOFD;
130 else { 128 else {
131 metadata->fd = create_fd(group, event, file); 129 metadata->fd = create_fd(group, event, file);
@@ -209,7 +207,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
209 if (!re) 207 if (!re)
210 return -ENOMEM; 208 return -ENOMEM;
211 209
212 re->event = event; 210 re->event = FANOTIFY_E(event);
213 re->fd = fd; 211 re->fd = fd;
214 212
215 mutex_lock(&group->fanotify_data.access_mutex); 213 mutex_lock(&group->fanotify_data.access_mutex);
@@ -217,7 +215,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
217 if (atomic_read(&group->fanotify_data.bypass_perm)) { 215 if (atomic_read(&group->fanotify_data.bypass_perm)) {
218 mutex_unlock(&group->fanotify_data.access_mutex); 216 mutex_unlock(&group->fanotify_data.access_mutex);
219 kmem_cache_free(fanotify_response_event_cache, re); 217 kmem_cache_free(fanotify_response_event_cache, re);
220 event->response = FAN_ALLOW; 218 FANOTIFY_E(event)->response = FAN_ALLOW;
221 return 0; 219 return 0;
222 } 220 }
223 221
@@ -273,7 +271,7 @@ out_close_fd:
273out: 271out:
274#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS 272#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
275 if (event->mask & FAN_ALL_PERM_EVENTS) { 273 if (event->mask & FAN_ALL_PERM_EVENTS) {
276 event->response = FAN_DENY; 274 FANOTIFY_E(event)->response = FAN_DENY;
277 wake_up(&group->fanotify_data.access_waitq); 275 wake_up(&group->fanotify_data.access_waitq);
278 } 276 }
279#endif 277#endif
@@ -321,7 +319,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
321 if (IS_ERR(kevent)) 319 if (IS_ERR(kevent))
322 break; 320 break;
323 ret = copy_event_to_user(group, kevent, buf); 321 ret = copy_event_to_user(group, kevent, buf);
324 fsnotify_put_event(kevent); 322 fsnotify_destroy_event(group, kevent);
325 if (ret < 0) 323 if (ret < 0)
326 break; 324 break;
327 buf += ret; 325 buf += ret;
@@ -409,7 +407,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
409static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 407static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
410{ 408{
411 struct fsnotify_group *group; 409 struct fsnotify_group *group;
412 struct fsnotify_event_holder *holder; 410 struct fsnotify_event *fsn_event;
413 void __user *p; 411 void __user *p;
414 int ret = -ENOTTY; 412 int ret = -ENOTTY;
415 size_t send_len = 0; 413 size_t send_len = 0;
@@ -421,7 +419,7 @@ static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long ar
421 switch (cmd) { 419 switch (cmd) {
422 case FIONREAD: 420 case FIONREAD:
423 mutex_lock(&group->notification_mutex); 421 mutex_lock(&group->notification_mutex);
424 list_for_each_entry(holder, &group->notification_list, event_list) 422 list_for_each_entry(fsn_event, &group->notification_list, list)
425 send_len += FAN_EVENT_METADATA_LEN; 423 send_len += FAN_EVENT_METADATA_LEN;
426 mutex_unlock(&group->notification_mutex); 424 mutex_unlock(&group->notification_mutex);
427 ret = put_user(send_len, (int __user *) p); 425 ret = put_user(send_len, (int __user *) p);
@@ -906,6 +904,7 @@ static int __init fanotify_user_setup(void)
906 fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); 904 fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
907 fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event, 905 fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event,
908 SLAB_PANIC); 906 SLAB_PANIC);
907 fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
909 908
910 return 0; 909 return 0;
911} 910}
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 4bb21d67d9b1..7c754c91c3f6 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -128,8 +128,7 @@ static int send_to_group(struct inode *to_tell,
128 struct fsnotify_mark *vfsmount_mark, 128 struct fsnotify_mark *vfsmount_mark,
129 __u32 mask, void *data, 129 __u32 mask, void *data,
130 int data_is, u32 cookie, 130 int data_is, u32 cookie,
131 const unsigned char *file_name, 131 const unsigned char *file_name)
132 struct fsnotify_event **event)
133{ 132{
134 struct fsnotify_group *group = NULL; 133 struct fsnotify_group *group = NULL;
135 __u32 inode_test_mask = 0; 134 __u32 inode_test_mask = 0;
@@ -170,10 +169,10 @@ static int send_to_group(struct inode *to_tell,
170 169
171 pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p" 170 pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p"
172 " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x" 171 " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x"
173 " data=%p data_is=%d cookie=%d event=%p\n", 172 " data=%p data_is=%d cookie=%d\n",
174 __func__, group, to_tell, mask, inode_mark, 173 __func__, group, to_tell, mask, inode_mark,
175 inode_test_mask, vfsmount_mark, vfsmount_test_mask, data, 174 inode_test_mask, vfsmount_mark, vfsmount_test_mask, data,
176 data_is, cookie, *event); 175 data_is, cookie);
177 176
178 if (!inode_test_mask && !vfsmount_test_mask) 177 if (!inode_test_mask && !vfsmount_test_mask)
179 return 0; 178 return 0;
@@ -183,14 +182,9 @@ static int send_to_group(struct inode *to_tell,
183 data_is) == false) 182 data_is) == false)
184 return 0; 183 return 0;
185 184
186 if (!*event) { 185 return group->ops->handle_event(group, to_tell, inode_mark,
187 *event = fsnotify_create_event(to_tell, mask, data, 186 vfsmount_mark, mask, data, data_is,
188 data_is, file_name, 187 file_name);
189 cookie, GFP_KERNEL);
190 if (!*event)
191 return -ENOMEM;
192 }
193 return group->ops->handle_event(group, inode_mark, vfsmount_mark, *event);
194} 188}
195 189
196/* 190/*
@@ -205,7 +199,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
205 struct hlist_node *inode_node = NULL, *vfsmount_node = NULL; 199 struct hlist_node *inode_node = NULL, *vfsmount_node = NULL;
206 struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL; 200 struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
207 struct fsnotify_group *inode_group, *vfsmount_group; 201 struct fsnotify_group *inode_group, *vfsmount_group;
208 struct fsnotify_event *event = NULL;
209 struct mount *mnt; 202 struct mount *mnt;
210 int idx, ret = 0; 203 int idx, ret = 0;
211 /* global tests shouldn't care about events on child only the specific event */ 204 /* global tests shouldn't care about events on child only the specific event */
@@ -258,18 +251,18 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
258 251
259 if (inode_group > vfsmount_group) { 252 if (inode_group > vfsmount_group) {
260 /* handle inode */ 253 /* handle inode */
261 ret = send_to_group(to_tell, inode_mark, NULL, mask, data, 254 ret = send_to_group(to_tell, inode_mark, NULL, mask,
262 data_is, cookie, file_name, &event); 255 data, data_is, cookie, file_name);
263 /* we didn't use the vfsmount_mark */ 256 /* we didn't use the vfsmount_mark */
264 vfsmount_group = NULL; 257 vfsmount_group = NULL;
265 } else if (vfsmount_group > inode_group) { 258 } else if (vfsmount_group > inode_group) {
266 ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, data, 259 ret = send_to_group(to_tell, NULL, vfsmount_mark, mask,
267 data_is, cookie, file_name, &event); 260 data, data_is, cookie, file_name);
268 inode_group = NULL; 261 inode_group = NULL;
269 } else { 262 } else {
270 ret = send_to_group(to_tell, inode_mark, vfsmount_mark, 263 ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
271 mask, data, data_is, cookie, file_name, 264 mask, data, data_is, cookie,
272 &event); 265 file_name);
273 } 266 }
274 267
275 if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS)) 268 if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
@@ -285,12 +278,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
285 ret = 0; 278 ret = 0;
286out: 279out:
287 srcu_read_unlock(&fsnotify_mark_srcu, idx); 280 srcu_read_unlock(&fsnotify_mark_srcu, idx);
288 /*
289 * fsnotify_create_event() took a reference so the event can't be cleaned
290 * up while we are still trying to add it to lists, drop that one.
291 */
292 if (event)
293 fsnotify_put_event(event);
294 281
295 return ret; 282 return ret;
296} 283}
diff --git a/fs/notify/group.c b/fs/notify/group.c
index bd2625bd88b4..ee674fe2cec7 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -99,6 +99,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
99 INIT_LIST_HEAD(&group->marks_list); 99 INIT_LIST_HEAD(&group->marks_list);
100 100
101 group->ops = ops; 101 group->ops = ops;
102 fsnotify_init_event(&group->overflow_event, NULL, FS_Q_OVERFLOW);
102 103
103 return group; 104 return group;
104} 105}
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index b6642e4de4bf..485eef3f4407 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -2,11 +2,12 @@
2#include <linux/inotify.h> 2#include <linux/inotify.h>
3#include <linux/slab.h> /* struct kmem_cache */ 3#include <linux/slab.h> /* struct kmem_cache */
4 4
5extern struct kmem_cache *event_priv_cachep; 5struct inotify_event_info {
6 6 struct fsnotify_event fse;
7struct inotify_event_private_data {
8 struct fsnotify_event_private_data fsnotify_event_priv_data;
9 int wd; 7 int wd;
8 u32 sync_cookie;
9 int name_len;
10 char name[];
10}; 11};
11 12
12struct inotify_inode_mark { 13struct inotify_inode_mark {
@@ -14,8 +15,18 @@ struct inotify_inode_mark {
14 int wd; 15 int wd;
15}; 16};
16 17
18static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
19{
20 return container_of(fse, struct inotify_event_info, fse);
21}
22
17extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, 23extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
18 struct fsnotify_group *group); 24 struct fsnotify_group *group);
19extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv); 25extern int inotify_handle_event(struct fsnotify_group *group,
26 struct inode *inode,
27 struct fsnotify_mark *inode_mark,
28 struct fsnotify_mark *vfsmount_mark,
29 u32 mask, void *data, int data_type,
30 const unsigned char *file_name);
20 31
21extern const struct fsnotify_ops inotify_fsnotify_ops; 32extern const struct fsnotify_ops inotify_fsnotify_ops;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 4216308b81b4..6fabbd163d16 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -34,100 +34,80 @@
34#include "inotify.h" 34#include "inotify.h"
35 35
36/* 36/*
37 * Check if 2 events contain the same information. We do not compare private data 37 * Check if 2 events contain the same information.
38 * but at this moment that isn't a problem for any know fsnotify listeners.
39 */ 38 */
40static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new) 39static bool event_compare(struct fsnotify_event *old_fsn,
40 struct fsnotify_event *new_fsn)
41{ 41{
42 if ((old->mask == new->mask) && 42 struct inotify_event_info *old, *new;
43 (old->to_tell == new->to_tell) && 43
44 (old->data_type == new->data_type) && 44 if (old_fsn->mask & FS_IN_IGNORED)
45 (old->name_len == new->name_len)) { 45 return false;
46 switch (old->data_type) { 46 old = INOTIFY_E(old_fsn);
47 case (FSNOTIFY_EVENT_INODE): 47 new = INOTIFY_E(new_fsn);
48 /* remember, after old was put on the wait_q we aren't 48 if ((old_fsn->mask == new_fsn->mask) &&
49 * allowed to look at the inode any more, only thing 49 (old_fsn->inode == new_fsn->inode) &&
50 * left to check was if the file_name is the same */ 50 (old->name_len == new->name_len) &&
51 if (!old->name_len || 51 (!old->name_len || !strcmp(old->name, new->name)))
52 !strcmp(old->file_name, new->file_name)) 52 return true;
53 return true;
54 break;
55 case (FSNOTIFY_EVENT_PATH):
56 if ((old->path.mnt == new->path.mnt) &&
57 (old->path.dentry == new->path.dentry))
58 return true;
59 break;
60 case (FSNOTIFY_EVENT_NONE):
61 if (old->mask & FS_Q_OVERFLOW)
62 return true;
63 else if (old->mask & FS_IN_IGNORED)
64 return false;
65 return true;
66 };
67 }
68 return false; 53 return false;
69} 54}
70 55
71static struct fsnotify_event *inotify_merge(struct list_head *list, 56static struct fsnotify_event *inotify_merge(struct list_head *list,
72 struct fsnotify_event *event) 57 struct fsnotify_event *event)
73{ 58{
74 struct fsnotify_event_holder *last_holder;
75 struct fsnotify_event *last_event; 59 struct fsnotify_event *last_event;
76 60
77 /* and the list better be locked by something too */ 61 last_event = list_entry(list->prev, struct fsnotify_event, list);
78 spin_lock(&event->lock); 62 if (!event_compare(last_event, event))
79 63 return NULL;
80 last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
81 last_event = last_holder->event;
82 if (event_compare(last_event, event))
83 fsnotify_get_event(last_event);
84 else
85 last_event = NULL;
86
87 spin_unlock(&event->lock);
88
89 return last_event; 64 return last_event;
90} 65}
91 66
92static int inotify_handle_event(struct fsnotify_group *group, 67int inotify_handle_event(struct fsnotify_group *group,
93 struct fsnotify_mark *inode_mark, 68 struct inode *inode,
94 struct fsnotify_mark *vfsmount_mark, 69 struct fsnotify_mark *inode_mark,
95 struct fsnotify_event *event) 70 struct fsnotify_mark *vfsmount_mark,
71 u32 mask, void *data, int data_type,
72 const unsigned char *file_name)
96{ 73{
97 struct inotify_inode_mark *i_mark; 74 struct inotify_inode_mark *i_mark;
98 struct inode *to_tell; 75 struct inotify_event_info *event;
99 struct inotify_event_private_data *event_priv;
100 struct fsnotify_event_private_data *fsn_event_priv;
101 struct fsnotify_event *added_event; 76 struct fsnotify_event *added_event;
102 int wd, ret = 0; 77 struct fsnotify_event *fsn_event;
78 int ret = 0;
79 int len = 0;
80 int alloc_len = sizeof(struct inotify_event_info);
103 81
104 BUG_ON(vfsmount_mark); 82 BUG_ON(vfsmount_mark);
105 83
106 pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group, 84 if (file_name) {
107 event, event->to_tell, event->mask); 85 len = strlen(file_name);
86 alloc_len += len + 1;
87 }
108 88
109 to_tell = event->to_tell; 89 pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
90 mask);
110 91
111 i_mark = container_of(inode_mark, struct inotify_inode_mark, 92 i_mark = container_of(inode_mark, struct inotify_inode_mark,
112 fsn_mark); 93 fsn_mark);
113 wd = i_mark->wd;
114 94
115 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); 95 event = kmalloc(alloc_len, GFP_KERNEL);
116 if (unlikely(!event_priv)) 96 if (unlikely(!event))
117 return -ENOMEM; 97 return -ENOMEM;
118 98
119 fsn_event_priv = &event_priv->fsnotify_event_priv_data; 99 fsn_event = &event->fse;
100 fsnotify_init_event(fsn_event, inode, mask);
101 event->wd = i_mark->wd;
102 event->name_len = len;
103 if (len)
104 strcpy(event->name, file_name);
120 105
121 fsnotify_get_group(group); 106 added_event = fsnotify_add_notify_event(group, fsn_event, inotify_merge);
122 fsn_event_priv->group = group;
123 event_priv->wd = wd;
124
125 added_event = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge);
126 if (added_event) { 107 if (added_event) {
127 inotify_free_event_priv(fsn_event_priv); 108 /* Our event wasn't used in the end. Free it. */
128 if (!IS_ERR(added_event)) 109 fsnotify_destroy_event(group, fsn_event);
129 fsnotify_put_event(added_event); 110 if (IS_ERR(added_event))
130 else
131 ret = PTR_ERR(added_event); 111 ret = PTR_ERR(added_event);
132 } 112 }
133 113
@@ -202,22 +182,15 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
202 free_uid(group->inotify_data.user); 182 free_uid(group->inotify_data.user);
203} 183}
204 184
205void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv) 185static void inotify_free_event(struct fsnotify_event *fsn_event)
206{ 186{
207 struct inotify_event_private_data *event_priv; 187 kfree(INOTIFY_E(fsn_event));
208
209
210 event_priv = container_of(fsn_event_priv, struct inotify_event_private_data,
211 fsnotify_event_priv_data);
212
213 fsnotify_put_group(fsn_event_priv->group);
214 kmem_cache_free(event_priv_cachep, event_priv);
215} 188}
216 189
217const struct fsnotify_ops inotify_fsnotify_ops = { 190const struct fsnotify_ops inotify_fsnotify_ops = {
218 .handle_event = inotify_handle_event, 191 .handle_event = inotify_handle_event,
219 .should_send_event = inotify_should_send_event, 192 .should_send_event = inotify_should_send_event,
220 .free_group_priv = inotify_free_group_priv, 193 .free_group_priv = inotify_free_group_priv,
221 .free_event_priv = inotify_free_event_priv, 194 .free_event = inotify_free_event,
222 .freeing_mark = inotify_freeing_mark, 195 .freeing_mark = inotify_freeing_mark,
223}; 196};
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1bb6dc8eaf1c..497395c8274b 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -50,7 +50,6 @@ static int inotify_max_queued_events __read_mostly;
50static int inotify_max_user_watches __read_mostly; 50static int inotify_max_user_watches __read_mostly;
51 51
52static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; 52static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
53struct kmem_cache *event_priv_cachep __read_mostly;
54 53
55#ifdef CONFIG_SYSCTL 54#ifdef CONFIG_SYSCTL
56 55
@@ -124,8 +123,11 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait)
124 return ret; 123 return ret;
125} 124}
126 125
127static int round_event_name_len(struct fsnotify_event *event) 126static int round_event_name_len(struct fsnotify_event *fsn_event)
128{ 127{
128 struct inotify_event_info *event;
129
130 event = INOTIFY_E(fsn_event);
129 if (!event->name_len) 131 if (!event->name_len)
130 return 0; 132 return 0;
131 return roundup(event->name_len + 1, sizeof(struct inotify_event)); 133 return roundup(event->name_len + 1, sizeof(struct inotify_event));
@@ -169,40 +171,27 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
169 * buffer we had in "get_one_event()" above. 171 * buffer we had in "get_one_event()" above.
170 */ 172 */
171static ssize_t copy_event_to_user(struct fsnotify_group *group, 173static ssize_t copy_event_to_user(struct fsnotify_group *group,
172 struct fsnotify_event *event, 174 struct fsnotify_event *fsn_event,
173 char __user *buf) 175 char __user *buf)
174{ 176{
175 struct inotify_event inotify_event; 177 struct inotify_event inotify_event;
176 struct fsnotify_event_private_data *fsn_priv; 178 struct inotify_event_info *event;
177 struct inotify_event_private_data *priv;
178 size_t event_size = sizeof(struct inotify_event); 179 size_t event_size = sizeof(struct inotify_event);
179 size_t name_len; 180 size_t name_len;
180 size_t pad_name_len; 181 size_t pad_name_len;
181 182
182 pr_debug("%s: group=%p event=%p\n", __func__, group, event); 183 pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event);
183
184 /* we get the inotify watch descriptor from the event private data */
185 spin_lock(&event->lock);
186 fsn_priv = fsnotify_remove_priv_from_event(group, event);
187 spin_unlock(&event->lock);
188
189 if (!fsn_priv)
190 inotify_event.wd = -1;
191 else {
192 priv = container_of(fsn_priv, struct inotify_event_private_data,
193 fsnotify_event_priv_data);
194 inotify_event.wd = priv->wd;
195 inotify_free_event_priv(fsn_priv);
196 }
197 184
185 event = INOTIFY_E(fsn_event);
198 name_len = event->name_len; 186 name_len = event->name_len;
199 /* 187 /*
200 * round up name length so it is a multiple of event_size 188 * round up name length so it is a multiple of event_size
201 * plus an extra byte for the terminating '\0'. 189 * plus an extra byte for the terminating '\0'.
202 */ 190 */
203 pad_name_len = round_event_name_len(event); 191 pad_name_len = round_event_name_len(fsn_event);
204 inotify_event.len = pad_name_len; 192 inotify_event.len = pad_name_len;
205 inotify_event.mask = inotify_mask_to_arg(event->mask); 193 inotify_event.mask = inotify_mask_to_arg(fsn_event->mask);
194 inotify_event.wd = event->wd;
206 inotify_event.cookie = event->sync_cookie; 195 inotify_event.cookie = event->sync_cookie;
207 196
208 /* send the main event */ 197 /* send the main event */
@@ -218,7 +207,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
218 */ 207 */
219 if (pad_name_len) { 208 if (pad_name_len) {
220 /* copy the path name */ 209 /* copy the path name */
221 if (copy_to_user(buf, event->file_name, name_len)) 210 if (copy_to_user(buf, event->name, name_len))
222 return -EFAULT; 211 return -EFAULT;
223 buf += name_len; 212 buf += name_len;
224 213
@@ -257,7 +246,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
257 if (IS_ERR(kevent)) 246 if (IS_ERR(kevent))
258 break; 247 break;
259 ret = copy_event_to_user(group, kevent, buf); 248 ret = copy_event_to_user(group, kevent, buf);
260 fsnotify_put_event(kevent); 249 fsnotify_destroy_event(group, kevent);
261 if (ret < 0) 250 if (ret < 0)
262 break; 251 break;
263 buf += ret; 252 buf += ret;
@@ -300,8 +289,7 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
300 unsigned long arg) 289 unsigned long arg)
301{ 290{
302 struct fsnotify_group *group; 291 struct fsnotify_group *group;
303 struct fsnotify_event_holder *holder; 292 struct fsnotify_event *fsn_event;
304 struct fsnotify_event *event;
305 void __user *p; 293 void __user *p;
306 int ret = -ENOTTY; 294 int ret = -ENOTTY;
307 size_t send_len = 0; 295 size_t send_len = 0;
@@ -314,10 +302,10 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
314 switch (cmd) { 302 switch (cmd) {
315 case FIONREAD: 303 case FIONREAD:
316 mutex_lock(&group->notification_mutex); 304 mutex_lock(&group->notification_mutex);
317 list_for_each_entry(holder, &group->notification_list, event_list) { 305 list_for_each_entry(fsn_event, &group->notification_list,
318 event = holder->event; 306 list) {
319 send_len += sizeof(struct inotify_event); 307 send_len += sizeof(struct inotify_event);
320 send_len += round_event_name_len(event); 308 send_len += round_event_name_len(fsn_event);
321 } 309 }
322 mutex_unlock(&group->notification_mutex); 310 mutex_unlock(&group->notification_mutex);
323 ret = put_user(send_len, (int __user *) p); 311 ret = put_user(send_len, (int __user *) p);
@@ -504,43 +492,12 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
504 struct fsnotify_group *group) 492 struct fsnotify_group *group)
505{ 493{
506 struct inotify_inode_mark *i_mark; 494 struct inotify_inode_mark *i_mark;
507 struct fsnotify_event *ignored_event, *notify_event;
508 struct inotify_event_private_data *event_priv;
509 struct fsnotify_event_private_data *fsn_event_priv;
510 int ret;
511 495
512 i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); 496 /* Queue ignore event for the watch */
513 497 inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED,
514 ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, 498 NULL, FSNOTIFY_EVENT_NONE, NULL);
515 FSNOTIFY_EVENT_NONE, NULL, 0,
516 GFP_NOFS);
517 if (!ignored_event)
518 goto skip_send_ignore;
519
520 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
521 if (unlikely(!event_priv))
522 goto skip_send_ignore;
523
524 fsn_event_priv = &event_priv->fsnotify_event_priv_data;
525
526 fsnotify_get_group(group);
527 fsn_event_priv->group = group;
528 event_priv->wd = i_mark->wd;
529
530 notify_event = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL);
531 if (notify_event) {
532 if (IS_ERR(notify_event))
533 ret = PTR_ERR(notify_event);
534 else
535 fsnotify_put_event(notify_event);
536 inotify_free_event_priv(fsn_event_priv);
537 }
538
539skip_send_ignore:
540 /* matches the reference taken when the event was created */
541 if (ignored_event)
542 fsnotify_put_event(ignored_event);
543 499
500 i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
544 /* remove this mark from the idr */ 501 /* remove this mark from the idr */
545 inotify_remove_from_idr(group, i_mark); 502 inotify_remove_from_idr(group, i_mark);
546 503
@@ -837,7 +794,6 @@ static int __init inotify_user_setup(void)
837 BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); 794 BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
838 795
839 inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); 796 inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
840 event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
841 797
842 inotify_max_queued_events = 16384; 798 inotify_max_queued_events = 16384;
843 inotify_max_user_instances = 128; 799 inotify_max_user_instances = 128;
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 7b51b05f160c..952237b8e2d2 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -48,15 +48,6 @@
48#include <linux/fsnotify_backend.h> 48#include <linux/fsnotify_backend.h>
49#include "fsnotify.h" 49#include "fsnotify.h"
50 50
51static struct kmem_cache *fsnotify_event_cachep;
52static struct kmem_cache *fsnotify_event_holder_cachep;
53/*
54 * This is a magic event we send when the q is too full. Since it doesn't
55 * hold real event information we just keep one system wide and use it any time
56 * it is needed. It's refcnt is set 1 at kernel init time and will never
57 * get set to 0 so it will never get 'freed'
58 */
59static struct fsnotify_event *q_overflow_event;
60static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0); 51static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0);
61 52
62/** 53/**
@@ -76,60 +67,14 @@ bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
76 return list_empty(&group->notification_list) ? true : false; 67 return list_empty(&group->notification_list) ? true : false;
77} 68}
78 69
79void fsnotify_get_event(struct fsnotify_event *event) 70void fsnotify_destroy_event(struct fsnotify_group *group,
71 struct fsnotify_event *event)
80{ 72{
81 atomic_inc(&event->refcnt); 73 /* Overflow events are per-group and we don't want to free them */
82} 74 if (!event || event->mask == FS_Q_OVERFLOW)
83
84void fsnotify_put_event(struct fsnotify_event *event)
85{
86 if (!event)
87 return; 75 return;
88 76
89 if (atomic_dec_and_test(&event->refcnt)) { 77 group->ops->free_event(event);
90 pr_debug("%s: event=%p\n", __func__, event);
91
92 if (event->data_type == FSNOTIFY_EVENT_PATH)
93 path_put(&event->path);
94
95 BUG_ON(!list_empty(&event->private_data_list));
96
97 kfree(event->file_name);
98 put_pid(event->tgid);
99 kmem_cache_free(fsnotify_event_cachep, event);
100 }
101}
102
103struct fsnotify_event_holder *fsnotify_alloc_event_holder(void)
104{
105 return kmem_cache_alloc(fsnotify_event_holder_cachep, GFP_KERNEL);
106}
107
108void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
109{
110 if (holder)
111 kmem_cache_free(fsnotify_event_holder_cachep, holder);
112}
113
114/*
115 * Find the private data that the group previously attached to this event when
116 * the group added the event to the notification queue (fsnotify_add_notify_event)
117 */
118struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event)
119{
120 struct fsnotify_event_private_data *lpriv;
121 struct fsnotify_event_private_data *priv = NULL;
122
123 assert_spin_locked(&event->lock);
124
125 list_for_each_entry(lpriv, &event->private_data_list, event_list) {
126 if (lpriv->group == group) {
127 priv = lpriv;
128 list_del(&priv->event_list);
129 break;
130 }
131 }
132 return priv;
133} 78}
134 79
135/* 80/*
@@ -137,91 +82,35 @@ struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnot
137 * event off the queue to deal with. If the event is successfully added to the 82 * event off the queue to deal with. If the event is successfully added to the
138 * group's notification queue, a reference is taken on event. 83 * group's notification queue, a reference is taken on event.
139 */ 84 */
140struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event, 85struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group,
141 struct fsnotify_event_private_data *priv, 86 struct fsnotify_event *event,
142 struct fsnotify_event *(*merge)(struct list_head *, 87 struct fsnotify_event *(*merge)(struct list_head *,
143 struct fsnotify_event *)) 88 struct fsnotify_event *))
144{ 89{
145 struct fsnotify_event *return_event = NULL; 90 struct fsnotify_event *return_event = NULL;
146 struct fsnotify_event_holder *holder = NULL;
147 struct list_head *list = &group->notification_list; 91 struct list_head *list = &group->notification_list;
148 92
149 pr_debug("%s: group=%p event=%p priv=%p\n", __func__, group, event, priv); 93 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
150
151 /*
152 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
153 * Check if we expect to be able to use that holder. If not alloc a new
154 * holder.
155 * For the overflow event it's possible that something will use the in
156 * event holder before we get the lock so we may need to jump back and
157 * alloc a new holder, this can't happen for most events...
158 */
159 if (!list_empty(&event->holder.event_list)) {
160alloc_holder:
161 holder = fsnotify_alloc_event_holder();
162 if (!holder)
163 return ERR_PTR(-ENOMEM);
164 }
165 94
166 mutex_lock(&group->notification_mutex); 95 mutex_lock(&group->notification_mutex);
167 96
168 if (group->q_len >= group->max_events) { 97 if (group->q_len >= group->max_events) {
169 event = q_overflow_event; 98 /* Queue overflow event only if it isn't already queued */
170 99 if (list_empty(&group->overflow_event.list))
171 /* 100 event = &group->overflow_event;
172 * we need to return the overflow event
173 * which means we need a ref
174 */
175 fsnotify_get_event(event);
176 return_event = event; 101 return_event = event;
177
178 /* sorry, no private data on the overflow event */
179 priv = NULL;
180 } 102 }
181 103
182 if (!list_empty(list) && merge) { 104 if (!list_empty(list) && merge) {
183 struct fsnotify_event *tmp; 105 return_event = merge(list, event);
184
185 tmp = merge(list, event);
186 if (tmp) {
187 mutex_unlock(&group->notification_mutex);
188
189 if (return_event)
190 fsnotify_put_event(return_event);
191 if (holder != &event->holder)
192 fsnotify_destroy_event_holder(holder);
193 return tmp;
194 }
195 }
196
197 spin_lock(&event->lock);
198
199 if (list_empty(&event->holder.event_list)) {
200 if (unlikely(holder))
201 fsnotify_destroy_event_holder(holder);
202 holder = &event->holder;
203 } else if (unlikely(!holder)) {
204 /* between the time we checked above and got the lock the in
205 * event holder was used, go back and get a new one */
206 spin_unlock(&event->lock);
207 mutex_unlock(&group->notification_mutex);
208
209 if (return_event) { 106 if (return_event) {
210 fsnotify_put_event(return_event); 107 mutex_unlock(&group->notification_mutex);
211 return_event = NULL; 108 return return_event;
212 } 109 }
213
214 goto alloc_holder;
215 } 110 }
216 111
217 group->q_len++; 112 group->q_len++;
218 holder->event = event; 113 list_add_tail(&event->list, list);
219
220 fsnotify_get_event(event);
221 list_add_tail(&holder->event_list, list);
222 if (priv)
223 list_add_tail(&priv->event_list, &event->private_data_list);
224 spin_unlock(&event->lock);
225 mutex_unlock(&group->notification_mutex); 114 mutex_unlock(&group->notification_mutex);
226 115
227 wake_up(&group->notification_waitq); 116 wake_up(&group->notification_waitq);
@@ -230,32 +119,20 @@ alloc_holder:
230} 119}
231 120
232/* 121/*
233 * Remove and return the first event from the notification list. There is a 122 * Remove and return the first event from the notification list. It is the
234 * reference held on this event since it was on the list. It is the responsibility 123 * responsibility of the caller to destroy the obtained event
235 * of the caller to drop this reference.
236 */ 124 */
237struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group) 125struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
238{ 126{
239 struct fsnotify_event *event; 127 struct fsnotify_event *event;
240 struct fsnotify_event_holder *holder;
241 128
242 BUG_ON(!mutex_is_locked(&group->notification_mutex)); 129 BUG_ON(!mutex_is_locked(&group->notification_mutex));
243 130
244 pr_debug("%s: group=%p\n", __func__, group); 131 pr_debug("%s: group=%p\n", __func__, group);
245 132
246 holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list); 133 event = list_first_entry(&group->notification_list,
247 134 struct fsnotify_event, list);
248 event = holder->event; 135 list_del(&event->list);
249
250 spin_lock(&event->lock);
251 holder->event = NULL;
252 list_del_init(&holder->event_list);
253 spin_unlock(&event->lock);
254
255 /* event == holder means we are referenced through the in event holder */
256 if (holder != &event->holder)
257 fsnotify_destroy_event_holder(holder);
258
259 group->q_len--; 136 group->q_len--;
260 137
261 return event; 138 return event;
@@ -266,15 +143,10 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
266 */ 143 */
267struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group) 144struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
268{ 145{
269 struct fsnotify_event *event;
270 struct fsnotify_event_holder *holder;
271
272 BUG_ON(!mutex_is_locked(&group->notification_mutex)); 146 BUG_ON(!mutex_is_locked(&group->notification_mutex));
273 147
274 holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list); 148 return list_first_entry(&group->notification_list,
275 event = holder->event; 149 struct fsnotify_event, list);
276
277 return event;
278} 150}
279 151
280/* 152/*
@@ -284,181 +156,31 @@ struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
284void fsnotify_flush_notify(struct fsnotify_group *group) 156void fsnotify_flush_notify(struct fsnotify_group *group)
285{ 157{
286 struct fsnotify_event *event; 158 struct fsnotify_event *event;
287 struct fsnotify_event_private_data *priv;
288 159
289 mutex_lock(&group->notification_mutex); 160 mutex_lock(&group->notification_mutex);
290 while (!fsnotify_notify_queue_is_empty(group)) { 161 while (!fsnotify_notify_queue_is_empty(group)) {
291 event = fsnotify_remove_notify_event(group); 162 event = fsnotify_remove_notify_event(group);
292 /* if they don't implement free_event_priv they better not have attached any */ 163 fsnotify_destroy_event(group, event);
293 if (group->ops->free_event_priv) {
294 spin_lock(&event->lock);
295 priv = fsnotify_remove_priv_from_event(group, event);
296 spin_unlock(&event->lock);
297 if (priv)
298 group->ops->free_event_priv(priv);
299 }
300 fsnotify_put_event(event); /* matches fsnotify_add_notify_event */
301 } 164 }
302 mutex_unlock(&group->notification_mutex); 165 mutex_unlock(&group->notification_mutex);
303} 166}
304 167
305static void initialize_event(struct fsnotify_event *event)
306{
307 INIT_LIST_HEAD(&event->holder.event_list);
308 atomic_set(&event->refcnt, 1);
309
310 spin_lock_init(&event->lock);
311
312 INIT_LIST_HEAD(&event->private_data_list);
313}
314
315/*
316 * Caller damn well better be holding whatever mutex is protecting the
317 * old_holder->event_list and the new_event must be a clean event which
318 * cannot be found anywhere else in the kernel.
319 */
320int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
321 struct fsnotify_event *new_event)
322{
323 struct fsnotify_event *old_event = old_holder->event;
324 struct fsnotify_event_holder *new_holder = &new_event->holder;
325
326 enum event_spinlock_class {
327 SPINLOCK_OLD,
328 SPINLOCK_NEW,
329 };
330
331 pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, new_event);
332
333 /*
334 * if the new_event's embedded holder is in use someone
335 * screwed up and didn't give us a clean new event.
336 */
337 BUG_ON(!list_empty(&new_holder->event_list));
338
339 spin_lock_nested(&old_event->lock, SPINLOCK_OLD);
340 spin_lock_nested(&new_event->lock, SPINLOCK_NEW);
341
342 new_holder->event = new_event;
343 list_replace_init(&old_holder->event_list, &new_holder->event_list);
344
345 spin_unlock(&new_event->lock);
346 spin_unlock(&old_event->lock);
347
348 /* event == holder means we are referenced through the in event holder */
349 if (old_holder != &old_event->holder)
350 fsnotify_destroy_event_holder(old_holder);
351
352 fsnotify_get_event(new_event); /* on the list take reference */
353 fsnotify_put_event(old_event); /* off the list, drop reference */
354
355 return 0;
356}
357
358struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event)
359{
360 struct fsnotify_event *event;
361
362 event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
363 if (!event)
364 return NULL;
365
366 pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, event);
367
368 memcpy(event, old_event, sizeof(*event));
369 initialize_event(event);
370
371 if (event->name_len) {
372 event->file_name = kstrdup(old_event->file_name, GFP_KERNEL);
373 if (!event->file_name) {
374 kmem_cache_free(fsnotify_event_cachep, event);
375 return NULL;
376 }
377 }
378 event->tgid = get_pid(old_event->tgid);
379 if (event->data_type == FSNOTIFY_EVENT_PATH)
380 path_get(&event->path);
381
382 return event;
383}
384
385/* 168/*
386 * fsnotify_create_event - Allocate a new event which will be sent to each 169 * fsnotify_create_event - Allocate a new event which will be sent to each
387 * group's handle_event function if the group was interested in this 170 * group's handle_event function if the group was interested in this
388 * particular event. 171 * particular event.
389 * 172 *
390 * @to_tell the inode which is supposed to receive the event (sometimes a 173 * @inode the inode which is supposed to receive the event (sometimes a
391 * parent of the inode to which the event happened. 174 * parent of the inode to which the event happened.
392 * @mask what actually happened. 175 * @mask what actually happened.
393 * @data pointer to the object which was actually affected 176 * @data pointer to the object which was actually affected
394 * @data_type flag indication if the data is a file, path, inode, nothing... 177 * @data_type flag indication if the data is a file, path, inode, nothing...
395 * @name the filename, if available 178 * @name the filename, if available
396 */ 179 */
397struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data, 180void fsnotify_init_event(struct fsnotify_event *event, struct inode *inode,
398 int data_type, const unsigned char *name, 181 u32 mask)
399 u32 cookie, gfp_t gfp)
400{ 182{
401 struct fsnotify_event *event; 183 INIT_LIST_HEAD(&event->list);
402 184 event->inode = inode;
403 event = kmem_cache_zalloc(fsnotify_event_cachep, gfp);
404 if (!event)
405 return NULL;
406
407 pr_debug("%s: event=%p to_tell=%p mask=%x data=%p data_type=%d\n",
408 __func__, event, to_tell, mask, data, data_type);
409
410 initialize_event(event);
411
412 if (name) {
413 event->file_name = kstrdup(name, gfp);
414 if (!event->file_name) {
415 kmem_cache_free(fsnotify_event_cachep, event);
416 return NULL;
417 }
418 event->name_len = strlen(event->file_name);
419 }
420
421 event->tgid = get_pid(task_tgid(current));
422 event->sync_cookie = cookie;
423 event->to_tell = to_tell;
424 event->data_type = data_type;
425
426 switch (data_type) {
427 case FSNOTIFY_EVENT_PATH: {
428 struct path *path = data;
429 event->path.dentry = path->dentry;
430 event->path.mnt = path->mnt;
431 path_get(&event->path);
432 break;
433 }
434 case FSNOTIFY_EVENT_INODE:
435 event->inode = data;
436 break;
437 case FSNOTIFY_EVENT_NONE:
438 event->inode = NULL;
439 event->path.dentry = NULL;
440 event->path.mnt = NULL;
441 break;
442 default:
443 BUG();
444 }
445
446 event->mask = mask; 185 event->mask = mask;
447
448 return event;
449}
450
451static __init int fsnotify_notification_init(void)
452{
453 fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
454 fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
455
456 q_overflow_event = fsnotify_create_event(NULL, FS_Q_OVERFLOW, NULL,
457 FSNOTIFY_EVENT_NONE, NULL, 0,
458 GFP_KERNEL);
459 if (!q_overflow_event)
460 panic("unable to allocate fsnotify q_overflow_event\n");
461
462 return 0;
463} 186}
464subsys_initcall(fsnotify_notification_init);