aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS8
-rw-r--r--fs/inode.c10
-rw-r--r--fs/notify/Kconfig13
-rw-r--r--fs/notify/Makefile2
-rw-r--r--fs/notify/dnotify/Kconfig1
-rw-r--r--fs/notify/dnotify/dnotify.c464
-rw-r--r--fs/notify/fsnotify.c186
-rw-r--r--fs/notify/fsnotify.h34
-rw-r--r--fs/notify/group.c254
-rw-r--r--fs/notify/inode_mark.c426
-rw-r--r--fs/notify/inotify/Kconfig20
-rw-r--r--fs/notify/inotify/Makefile2
-rw-r--r--fs/notify/inotify/inotify.c20
-rw-r--r--fs/notify/inotify/inotify.h21
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c138
-rw-r--r--fs/notify/inotify/inotify_user.c837
-rw-r--r--fs/notify/notification.c411
-rw-r--r--include/linux/dcache.h4
-rw-r--r--include/linux/dnotify.h29
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/fsnotify.h199
-rw-r--r--include/linux/fsnotify_backend.h387
-rw-r--r--init/Kconfig3
23 files changed, 2813 insertions, 662 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 1a0084e22cf3..c944d618dc83 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1802,10 +1802,10 @@ F: drivers/char/epca*
1802F: drivers/char/digi* 1802F: drivers/char/digi*
1803 1803
1804DIRECTORY NOTIFICATION (DNOTIFY) 1804DIRECTORY NOTIFICATION (DNOTIFY)
1805P: Stephen Rothwell 1805P: Eric Paris
1806M: sfr@canb.auug.org.au 1806M: eparis@parisplace.org
1807L: linux-kernel@vger.kernel.org 1807L: linux-kernel@vger.kernel.org
1808S: Supported 1808S: Maintained
1809F: Documentation/filesystems/dnotify.txt 1809F: Documentation/filesystems/dnotify.txt
1810F: fs/notify/dnotify/ 1810F: fs/notify/dnotify/
1811F: include/linux/dnotify.h 1811F: include/linux/dnotify.h
@@ -2858,6 +2858,8 @@ P: John McCutchan
2858M: john@johnmccutchan.com 2858M: john@johnmccutchan.com
2859P: Robert Love 2859P: Robert Love
2860M: rlove@rlove.org 2860M: rlove@rlove.org
2861P: Eric Paris
2862M: eparis@parisplace.org
2861L: linux-kernel@vger.kernel.org 2863L: linux-kernel@vger.kernel.org
2862S: Maintained 2864S: Maintained
2863F: Documentation/filesystems/inotify.txt 2865F: Documentation/filesystems/inotify.txt
diff --git a/fs/inode.c b/fs/inode.c
index bca0c618fdb3..ca337014ae29 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -22,6 +22,7 @@
22#include <linux/cdev.h> 22#include <linux/cdev.h>
23#include <linux/bootmem.h> 23#include <linux/bootmem.h>
24#include <linux/inotify.h> 24#include <linux/inotify.h>
25#include <linux/fsnotify.h>
25#include <linux/mount.h> 26#include <linux/mount.h>
26#include <linux/async.h> 27#include <linux/async.h>
27 28
@@ -189,6 +190,10 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode)
189 inode->i_private = NULL; 190 inode->i_private = NULL;
190 inode->i_mapping = mapping; 191 inode->i_mapping = mapping;
191 192
193#ifdef CONFIG_FSNOTIFY
194 inode->i_fsnotify_mask = 0;
195#endif
196
192 return inode; 197 return inode;
193 198
194out_free_security: 199out_free_security:
@@ -221,6 +226,7 @@ void destroy_inode(struct inode *inode)
221 BUG_ON(inode_has_buffers(inode)); 226 BUG_ON(inode_has_buffers(inode));
222 ima_inode_free(inode); 227 ima_inode_free(inode);
223 security_inode_free(inode); 228 security_inode_free(inode);
229 fsnotify_inode_delete(inode);
224 if (inode->i_sb->s_op->destroy_inode) 230 if (inode->i_sb->s_op->destroy_inode)
225 inode->i_sb->s_op->destroy_inode(inode); 231 inode->i_sb->s_op->destroy_inode(inode);
226 else 232 else
@@ -252,6 +258,9 @@ void inode_init_once(struct inode *inode)
252 INIT_LIST_HEAD(&inode->inotify_watches); 258 INIT_LIST_HEAD(&inode->inotify_watches);
253 mutex_init(&inode->inotify_mutex); 259 mutex_init(&inode->inotify_mutex);
254#endif 260#endif
261#ifdef CONFIG_FSNOTIFY
262 INIT_HLIST_HEAD(&inode->i_fsnotify_mark_entries);
263#endif
255} 264}
256EXPORT_SYMBOL(inode_init_once); 265EXPORT_SYMBOL(inode_init_once);
257 266
@@ -398,6 +407,7 @@ int invalidate_inodes(struct super_block *sb)
398 mutex_lock(&iprune_mutex); 407 mutex_lock(&iprune_mutex);
399 spin_lock(&inode_lock); 408 spin_lock(&inode_lock);
400 inotify_unmount_inodes(&sb->s_inodes); 409 inotify_unmount_inodes(&sb->s_inodes);
410 fsnotify_unmount_inodes(&sb->s_inodes);
401 busy = invalidate_list(&sb->s_inodes, &throw_away); 411 busy = invalidate_list(&sb->s_inodes, &throw_away);
402 spin_unlock(&inode_lock); 412 spin_unlock(&inode_lock);
403 413
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index 50914d7303c6..31dac7e3b0f1 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -1,2 +1,15 @@
1config FSNOTIFY
2 bool "Filesystem notification backend"
3 default y
4 ---help---
5 fsnotify is a backend for filesystem notification. fsnotify does
6 not provide any userspace interface but does provide the basis
7 needed for other notification schemes such as dnotify, inotify,
8 and fanotify.
9
10 Say Y here to enable fsnotify suport.
11
12 If unsure, say Y.
13
1source "fs/notify/dnotify/Kconfig" 14source "fs/notify/dnotify/Kconfig"
2source "fs/notify/inotify/Kconfig" 15source "fs/notify/inotify/Kconfig"
diff --git a/fs/notify/Makefile b/fs/notify/Makefile
index 5a95b6010ce7..0922cc826c46 100644
--- a/fs/notify/Makefile
+++ b/fs/notify/Makefile
@@ -1,2 +1,4 @@
1obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o inode_mark.o
2
1obj-y += dnotify/ 3obj-y += dnotify/
2obj-y += inotify/ 4obj-y += inotify/
diff --git a/fs/notify/dnotify/Kconfig b/fs/notify/dnotify/Kconfig
index 26adf5dfa646..904ff8d5405a 100644
--- a/fs/notify/dnotify/Kconfig
+++ b/fs/notify/dnotify/Kconfig
@@ -1,5 +1,6 @@
1config DNOTIFY 1config DNOTIFY
2 bool "Dnotify support" 2 bool "Dnotify support"
3 depends on FSNOTIFY
3 default y 4 default y
4 help 5 help
5 Dnotify is a directory-based per-fd file change notification system 6 Dnotify is a directory-based per-fd file change notification system
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index b0aa2cde80bd..828a889be909 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -3,6 +3,9 @@
3 * 3 *
4 * Copyright (C) 2000,2001,2002 Stephen Rothwell 4 * Copyright (C) 2000,2001,2002 Stephen Rothwell
5 * 5 *
6 * Copyright (C) 2009 Eric Paris <Red Hat Inc>
7 * dnotify was largly rewritten to use the new fsnotify infrastructure
8 *
6 * This program is free software; you can redistribute it and/or modify it 9 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the 10 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any 11 * Free Software Foundation; either version 2, or (at your option) any
@@ -21,24 +24,173 @@
21#include <linux/spinlock.h> 24#include <linux/spinlock.h>
22#include <linux/slab.h> 25#include <linux/slab.h>
23#include <linux/fdtable.h> 26#include <linux/fdtable.h>
27#include <linux/fsnotify_backend.h>
24 28
25int dir_notify_enable __read_mostly = 1; 29int dir_notify_enable __read_mostly = 1;
26 30
27static struct kmem_cache *dn_cache __read_mostly; 31static struct kmem_cache *dnotify_struct_cache __read_mostly;
32static struct kmem_cache *dnotify_mark_entry_cache __read_mostly;
33static struct fsnotify_group *dnotify_group __read_mostly;
34static DEFINE_MUTEX(dnotify_mark_mutex);
35
36/*
37 * dnotify will attach one of these to each inode (i_fsnotify_mark_entries) which
38 * is being watched by dnotify. If multiple userspace applications are watching
39 * the same directory with dnotify their information is chained in dn
40 */
41struct dnotify_mark_entry {
42 struct fsnotify_mark_entry fsn_entry;
43 struct dnotify_struct *dn;
44};
28 45
29static void redo_inode_mask(struct inode *inode) 46/*
47 * When a process starts or stops watching an inode the set of events which
48 * dnotify cares about for that inode may change. This function runs the
49 * list of everything receiving dnotify events about this directory and calculates
50 * the set of all those events. After it updates what dnotify is interested in
51 * it calls the fsnotify function so it can update the set of all events relevant
52 * to this inode.
53 */
54static void dnotify_recalc_inode_mask(struct fsnotify_mark_entry *entry)
30{ 55{
31 unsigned long new_mask; 56 __u32 new_mask, old_mask;
32 struct dnotify_struct *dn; 57 struct dnotify_struct *dn;
58 struct dnotify_mark_entry *dnentry = container_of(entry,
59 struct dnotify_mark_entry,
60 fsn_entry);
61
62 assert_spin_locked(&entry->lock);
33 63
64 old_mask = entry->mask;
34 new_mask = 0; 65 new_mask = 0;
35 for (dn = inode->i_dnotify; dn != NULL; dn = dn->dn_next) 66 for (dn = dnentry->dn; dn != NULL; dn = dn->dn_next)
36 new_mask |= dn->dn_mask & ~DN_MULTISHOT; 67 new_mask |= (dn->dn_mask & ~FS_DN_MULTISHOT);
37 inode->i_dnotify_mask = new_mask; 68 entry->mask = new_mask;
69
70 if (old_mask == new_mask)
71 return;
72
73 if (entry->inode)
74 fsnotify_recalc_inode_mask(entry->inode);
75}
76
77/*
78 * Mains fsnotify call where events are delivered to dnotify.
79 * Find the dnotify mark on the relevant inode, run the list of dnotify structs
80 * on that mark and determine which of them has expressed interest in receiving
81 * events of this type. When found send the correct process and signal and
82 * destroy the dnotify struct if it was not registered to receive multiple
83 * events.
84 */
85static int dnotify_handle_event(struct fsnotify_group *group,
86 struct fsnotify_event *event)
87{
88 struct fsnotify_mark_entry *entry = NULL;
89 struct dnotify_mark_entry *dnentry;
90 struct inode *to_tell;
91 struct dnotify_struct *dn;
92 struct dnotify_struct **prev;
93 struct fown_struct *fown;
94
95 to_tell = event->to_tell;
96
97 spin_lock(&to_tell->i_lock);
98 entry = fsnotify_find_mark_entry(group, to_tell);
99 spin_unlock(&to_tell->i_lock);
100
101 /* unlikely since we alreay passed dnotify_should_send_event() */
102 if (unlikely(!entry))
103 return 0;
104 dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
105
106 spin_lock(&entry->lock);
107 prev = &dnentry->dn;
108 while ((dn = *prev) != NULL) {
109 if ((dn->dn_mask & event->mask) == 0) {
110 prev = &dn->dn_next;
111 continue;
112 }
113 fown = &dn->dn_filp->f_owner;
114 send_sigio(fown, dn->dn_fd, POLL_MSG);
115 if (dn->dn_mask & FS_DN_MULTISHOT)
116 prev = &dn->dn_next;
117 else {
118 *prev = dn->dn_next;
119 kmem_cache_free(dnotify_struct_cache, dn);
120 dnotify_recalc_inode_mask(entry);
121 }
122 }
123
124 spin_unlock(&entry->lock);
125 fsnotify_put_mark(entry);
126
127 return 0;
128}
129
130/*
131 * Given an inode and mask determine if dnotify would be interested in sending
132 * userspace notification for that pair.
133 */
134static bool dnotify_should_send_event(struct fsnotify_group *group,
135 struct inode *inode, __u32 mask)
136{
137 struct fsnotify_mark_entry *entry;
138 bool send;
139
140 /* !dir_notify_enable should never get here, don't waste time checking
141 if (!dir_notify_enable)
142 return 0; */
143
144 /* not a dir, dnotify doesn't care */
145 if (!S_ISDIR(inode->i_mode))
146 return false;
147
148 spin_lock(&inode->i_lock);
149 entry = fsnotify_find_mark_entry(group, inode);
150 spin_unlock(&inode->i_lock);
151
152 /* no mark means no dnotify watch */
153 if (!entry)
154 return false;
155
156 mask = (mask & ~FS_EVENT_ON_CHILD);
157 send = (mask & entry->mask);
158
159 fsnotify_put_mark(entry); /* matches fsnotify_find_mark_entry */
160
161 return send;
162}
163
164static void dnotify_free_mark(struct fsnotify_mark_entry *entry)
165{
166 struct dnotify_mark_entry *dnentry = container_of(entry,
167 struct dnotify_mark_entry,
168 fsn_entry);
169
170 BUG_ON(dnentry->dn);
171
172 kmem_cache_free(dnotify_mark_entry_cache, dnentry);
38} 173}
39 174
175static struct fsnotify_ops dnotify_fsnotify_ops = {
176 .handle_event = dnotify_handle_event,
177 .should_send_event = dnotify_should_send_event,
178 .free_group_priv = NULL,
179 .freeing_mark = NULL,
180 .free_event_priv = NULL,
181};
182
183/*
184 * Called every time a file is closed. Looks first for a dnotify mark on the
185 * inode. If one is found run all of the ->dn entries attached to that
186 * mark for one relevant to this process closing the file and remove that
187 * dnotify_struct. If that was the last dnotify_struct also remove the
188 * fsnotify_mark_entry.
189 */
40void dnotify_flush(struct file *filp, fl_owner_t id) 190void dnotify_flush(struct file *filp, fl_owner_t id)
41{ 191{
192 struct fsnotify_mark_entry *entry;
193 struct dnotify_mark_entry *dnentry;
42 struct dnotify_struct *dn; 194 struct dnotify_struct *dn;
43 struct dnotify_struct **prev; 195 struct dnotify_struct **prev;
44 struct inode *inode; 196 struct inode *inode;
@@ -46,145 +198,243 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
46 inode = filp->f_path.dentry->d_inode; 198 inode = filp->f_path.dentry->d_inode;
47 if (!S_ISDIR(inode->i_mode)) 199 if (!S_ISDIR(inode->i_mode))
48 return; 200 return;
201
49 spin_lock(&inode->i_lock); 202 spin_lock(&inode->i_lock);
50 prev = &inode->i_dnotify; 203 entry = fsnotify_find_mark_entry(dnotify_group, inode);
204 spin_unlock(&inode->i_lock);
205 if (!entry)
206 return;
207 dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
208
209 mutex_lock(&dnotify_mark_mutex);
210
211 spin_lock(&entry->lock);
212 prev = &dnentry->dn;
51 while ((dn = *prev) != NULL) { 213 while ((dn = *prev) != NULL) {
52 if ((dn->dn_owner == id) && (dn->dn_filp == filp)) { 214 if ((dn->dn_owner == id) && (dn->dn_filp == filp)) {
53 *prev = dn->dn_next; 215 *prev = dn->dn_next;
54 redo_inode_mask(inode); 216 kmem_cache_free(dnotify_struct_cache, dn);
55 kmem_cache_free(dn_cache, dn); 217 dnotify_recalc_inode_mask(entry);
56 break; 218 break;
57 } 219 }
58 prev = &dn->dn_next; 220 prev = &dn->dn_next;
59 } 221 }
60 spin_unlock(&inode->i_lock); 222
223 spin_unlock(&entry->lock);
224
225 /* nothing else could have found us thanks to the dnotify_mark_mutex */
226 if (dnentry->dn == NULL)
227 fsnotify_destroy_mark_by_entry(entry);
228
229 fsnotify_recalc_group_mask(dnotify_group);
230
231 mutex_unlock(&dnotify_mark_mutex);
232
233 fsnotify_put_mark(entry);
234}
235
236/* this conversion is done only at watch creation */
237static __u32 convert_arg(unsigned long arg)
238{
239 __u32 new_mask = FS_EVENT_ON_CHILD;
240
241 if (arg & DN_MULTISHOT)
242 new_mask |= FS_DN_MULTISHOT;
243 if (arg & DN_DELETE)
244 new_mask |= (FS_DELETE | FS_MOVED_FROM);
245 if (arg & DN_MODIFY)
246 new_mask |= FS_MODIFY;
247 if (arg & DN_ACCESS)
248 new_mask |= FS_ACCESS;
249 if (arg & DN_ATTRIB)
250 new_mask |= FS_ATTRIB;
251 if (arg & DN_RENAME)
252 new_mask |= FS_DN_RENAME;
253 if (arg & DN_CREATE)
254 new_mask |= (FS_CREATE | FS_MOVED_TO);
255
256 return new_mask;
61} 257}
62 258
259/*
260 * If multiple processes watch the same inode with dnotify there is only one
261 * dnotify mark in inode->i_fsnotify_mark_entries but we chain a dnotify_struct
262 * onto that mark. This function either attaches the new dnotify_struct onto
263 * that list, or it |= the mask onto an existing dnofiy_struct.
264 */
265static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark_entry *dnentry,
266 fl_owner_t id, int fd, struct file *filp, __u32 mask)
267{
268 struct dnotify_struct *odn;
269
270 odn = dnentry->dn;
271 while (odn != NULL) {
272 /* adding more events to existing dnofiy_struct? */
273 if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
274 odn->dn_fd = fd;
275 odn->dn_mask |= mask;
276 return -EEXIST;
277 }
278 odn = odn->dn_next;
279 }
280
281 dn->dn_mask = mask;
282 dn->dn_fd = fd;
283 dn->dn_filp = filp;
284 dn->dn_owner = id;
285 dn->dn_next = dnentry->dn;
286 dnentry->dn = dn;
287
288 return 0;
289}
290
291/*
292 * When a process calls fcntl to attach a dnotify watch to a directory it ends
293 * up here. Allocate both a mark for fsnotify to add and a dnotify_struct to be
294 * attached to the fsnotify_mark.
295 */
63int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg) 296int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
64{ 297{
298 struct dnotify_mark_entry *new_dnentry, *dnentry;
299 struct fsnotify_mark_entry *new_entry, *entry;
65 struct dnotify_struct *dn; 300 struct dnotify_struct *dn;
66 struct dnotify_struct *odn;
67 struct dnotify_struct **prev;
68 struct inode *inode; 301 struct inode *inode;
69 fl_owner_t id = current->files; 302 fl_owner_t id = current->files;
70 struct file *f; 303 struct file *f;
71 int error = 0; 304 int destroy = 0, error = 0;
305 __u32 mask;
306
307 /* we use these to tell if we need to kfree */
308 new_entry = NULL;
309 dn = NULL;
310
311 if (!dir_notify_enable) {
312 error = -EINVAL;
313 goto out_err;
314 }
72 315
316 /* a 0 mask means we are explicitly removing the watch */
73 if ((arg & ~DN_MULTISHOT) == 0) { 317 if ((arg & ~DN_MULTISHOT) == 0) {
74 dnotify_flush(filp, id); 318 dnotify_flush(filp, id);
75 return 0; 319 error = 0;
320 goto out_err;
76 } 321 }
77 if (!dir_notify_enable) 322
78 return -EINVAL; 323 /* dnotify only works on directories */
79 inode = filp->f_path.dentry->d_inode; 324 inode = filp->f_path.dentry->d_inode;
80 if (!S_ISDIR(inode->i_mode)) 325 if (!S_ISDIR(inode->i_mode)) {
81 return -ENOTDIR; 326 error = -ENOTDIR;
82 dn = kmem_cache_alloc(dn_cache, GFP_KERNEL); 327 goto out_err;
83 if (dn == NULL)
84 return -ENOMEM;
85 spin_lock(&inode->i_lock);
86 prev = &inode->i_dnotify;
87 while ((odn = *prev) != NULL) {
88 if ((odn->dn_owner == id) && (odn->dn_filp == filp)) {
89 odn->dn_fd = fd;
90 odn->dn_mask |= arg;
91 inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
92 goto out_free;
93 }
94 prev = &odn->dn_next;
95 } 328 }
96 329
97 rcu_read_lock(); 330 /* expect most fcntl to add new rather than augment old */
98 f = fcheck(fd); 331 dn = kmem_cache_alloc(dnotify_struct_cache, GFP_KERNEL);
99 rcu_read_unlock(); 332 if (!dn) {
100 /* we'd lost the race with close(), sod off silently */ 333 error = -ENOMEM;
101 /* note that inode->i_lock prevents reordering problems 334 goto out_err;
102 * between accesses to descriptor table and ->i_dnotify */ 335 }
103 if (f != filp)
104 goto out_free;
105 336
106 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); 337 /* new fsnotify mark, we expect most fcntl calls to add a new mark */
107 if (error) 338 new_dnentry = kmem_cache_alloc(dnotify_mark_entry_cache, GFP_KERNEL);
108 goto out_free; 339 if (!new_dnentry) {
340 error = -ENOMEM;
341 goto out_err;
342 }
109 343
110 dn->dn_mask = arg; 344 /* convert the userspace DN_* "arg" to the internal FS_* defines in fsnotify */
111 dn->dn_fd = fd; 345 mask = convert_arg(arg);
112 dn->dn_filp = filp;
113 dn->dn_owner = id;
114 inode->i_dnotify_mask |= arg & ~DN_MULTISHOT;
115 dn->dn_next = inode->i_dnotify;
116 inode->i_dnotify = dn;
117 spin_unlock(&inode->i_lock);
118 return 0;
119 346
120out_free: 347 /* set up the new_entry and new_dnentry */
121 spin_unlock(&inode->i_lock); 348 new_entry = &new_dnentry->fsn_entry;
122 kmem_cache_free(dn_cache, dn); 349 fsnotify_init_mark(new_entry, dnotify_free_mark);
123 return error; 350 new_entry->mask = mask;
124} 351 new_dnentry->dn = NULL;
125 352
126void __inode_dir_notify(struct inode *inode, unsigned long event) 353 /* this is needed to prevent the fcntl/close race described below */
127{ 354 mutex_lock(&dnotify_mark_mutex);
128 struct dnotify_struct * dn;
129 struct dnotify_struct **prev;
130 struct fown_struct * fown;
131 int changed = 0;
132 355
356 /* add the new_entry or find an old one. */
133 spin_lock(&inode->i_lock); 357 spin_lock(&inode->i_lock);
134 prev = &inode->i_dnotify; 358 entry = fsnotify_find_mark_entry(dnotify_group, inode);
135 while ((dn = *prev) != NULL) {
136 if ((dn->dn_mask & event) == 0) {
137 prev = &dn->dn_next;
138 continue;
139 }
140 fown = &dn->dn_filp->f_owner;
141 send_sigio(fown, dn->dn_fd, POLL_MSG);
142 if (dn->dn_mask & DN_MULTISHOT)
143 prev = &dn->dn_next;
144 else {
145 *prev = dn->dn_next;
146 changed = 1;
147 kmem_cache_free(dn_cache, dn);
148 }
149 }
150 if (changed)
151 redo_inode_mask(inode);
152 spin_unlock(&inode->i_lock); 359 spin_unlock(&inode->i_lock);
153} 360 if (entry) {
154 361 dnentry = container_of(entry, struct dnotify_mark_entry, fsn_entry);
155EXPORT_SYMBOL(__inode_dir_notify); 362 spin_lock(&entry->lock);
363 } else {
364 fsnotify_add_mark(new_entry, dnotify_group, inode);
365 spin_lock(&new_entry->lock);
366 entry = new_entry;
367 dnentry = new_dnentry;
368 /* we used new_entry, so don't free it */
369 new_entry = NULL;
370 }
156 371
157/* 372 rcu_read_lock();
158 * This is hopelessly wrong, but unfixable without API changes. At 373 f = fcheck(fd);
159 * least it doesn't oops the kernel... 374 rcu_read_unlock();
160 *
161 * To safely access ->d_parent we need to keep d_move away from it. Use the
162 * dentry's d_lock for this.
163 */
164void dnotify_parent(struct dentry *dentry, unsigned long event)
165{
166 struct dentry *parent;
167 375
168 if (!dir_notify_enable) 376 /* if (f != filp) means that we lost a race and another task/thread
169 return; 377 * actually closed the fd we are still playing with before we grabbed
378 * the dnotify_mark_mutex and entry->lock. Since closing the fd is the
379 * only time we clean up the mark entries we need to get our mark off
380 * the list. */
381 if (f != filp) {
382 /* if we added ourselves, shoot ourselves, it's possible that
383 * the flush actually did shoot this entry. That's fine too
384 * since multiple calls to destroy_mark is perfectly safe, if
385 * we found a dnentry already attached to the inode, just sod
386 * off silently as the flush at close time dealt with it.
387 */
388 if (dnentry == new_dnentry)
389 destroy = 1;
390 goto out;
391 }
170 392
171 spin_lock(&dentry->d_lock); 393 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
172 parent = dentry->d_parent; 394 if (error) {
173 if (parent->d_inode->i_dnotify_mask & event) { 395 /* if we added, we must shoot */
174 dget(parent); 396 if (dnentry == new_dnentry)
175 spin_unlock(&dentry->d_lock); 397 destroy = 1;
176 __inode_dir_notify(parent->d_inode, event); 398 goto out;
177 dput(parent);
178 } else {
179 spin_unlock(&dentry->d_lock);
180 } 399 }
400
401 error = attach_dn(dn, dnentry, id, fd, filp, mask);
402 /* !error means that we attached the dn to the dnentry, so don't free it */
403 if (!error)
404 dn = NULL;
405 /* -EEXIST means that we didn't add this new dn and used an old one.
406 * that isn't an error (and the unused dn should be freed) */
407 else if (error == -EEXIST)
408 error = 0;
409
410 dnotify_recalc_inode_mask(entry);
411out:
412 spin_unlock(&entry->lock);
413
414 if (destroy)
415 fsnotify_destroy_mark_by_entry(entry);
416
417 fsnotify_recalc_group_mask(dnotify_group);
418
419 mutex_unlock(&dnotify_mark_mutex);
420 fsnotify_put_mark(entry);
421out_err:
422 if (new_entry)
423 fsnotify_put_mark(new_entry);
424 if (dn)
425 kmem_cache_free(dnotify_struct_cache, dn);
426 return error;
181} 427}
182EXPORT_SYMBOL_GPL(dnotify_parent);
183 428
184static int __init dnotify_init(void) 429static int __init dnotify_init(void)
185{ 430{
186 dn_cache = kmem_cache_create("dnotify_cache", 431 dnotify_struct_cache = KMEM_CACHE(dnotify_struct, SLAB_PANIC);
187 sizeof(struct dnotify_struct), 0, SLAB_PANIC, NULL); 432 dnotify_mark_entry_cache = KMEM_CACHE(dnotify_mark_entry, SLAB_PANIC);
433
434 dnotify_group = fsnotify_obtain_group(DNOTIFY_GROUP_NUM,
435 0, &dnotify_fsnotify_ops);
436 if (IS_ERR(dnotify_group))
437 panic("unable to allocate fsnotify group for dnotify\n");
188 return 0; 438 return 0;
189} 439}
190 440
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
new file mode 100644
index 000000000000..ec2f7bd76818
--- /dev/null
+++ b/fs/notify/fsnotify.c
@@ -0,0 +1,186 @@
1/*
2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; see the file COPYING. If not, write to
16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19#include <linux/dcache.h>
20#include <linux/fs.h>
21#include <linux/init.h>
22#include <linux/module.h>
23#include <linux/srcu.h>
24
25#include <linux/fsnotify_backend.h>
26#include "fsnotify.h"
27
28/*
29 * Clear all of the marks on an inode when it is being evicted from core
30 */
31void __fsnotify_inode_delete(struct inode *inode)
32{
33 fsnotify_clear_marks_by_inode(inode);
34}
35EXPORT_SYMBOL_GPL(__fsnotify_inode_delete);
36
37/*
38 * Given an inode, first check if we care what happens to our children. Inotify
39 * and dnotify both tell their parents about events. If we care about any event
40 * on a child we run all of our children and set a dentry flag saying that the
41 * parent cares. Thus when an event happens on a child it can quickly tell if
42 * if there is a need to find a parent and send the event to the parent.
43 */
44void __fsnotify_update_child_dentry_flags(struct inode *inode)
45{
46 struct dentry *alias;
47 int watched;
48
49 if (!S_ISDIR(inode->i_mode))
50 return;
51
52 /* determine if the children should tell inode about their events */
53 watched = fsnotify_inode_watches_children(inode);
54
55 spin_lock(&dcache_lock);
56 /* run all of the dentries associated with this inode. Since this is a
57 * directory, there damn well better only be one item on this list */
58 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
59 struct dentry *child;
60
61 /* run all of the children of the original inode and fix their
62 * d_flags to indicate parental interest (their parent is the
63 * original inode) */
64 list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
65 if (!child->d_inode)
66 continue;
67
68 spin_lock(&child->d_lock);
69 if (watched)
70 child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
71 else
72 child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
73 spin_unlock(&child->d_lock);
74 }
75 }
76 spin_unlock(&dcache_lock);
77}
78
79/* Notify this dentry's parent about a child's events. */
80void __fsnotify_parent(struct dentry *dentry, __u32 mask)
81{
82 struct dentry *parent;
83 struct inode *p_inode;
84 bool send = false;
85 bool should_update_children = false;
86
87 if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
88 return;
89
90 spin_lock(&dentry->d_lock);
91 parent = dentry->d_parent;
92 p_inode = parent->d_inode;
93
94 if (fsnotify_inode_watches_children(p_inode)) {
95 if (p_inode->i_fsnotify_mask & mask) {
96 dget(parent);
97 send = true;
98 }
99 } else {
100 /*
101 * The parent doesn't care about events on it's children but
102 * at least one child thought it did. We need to run all the
103 * children and update their d_flags to let them know p_inode
104 * doesn't care about them any more.
105 */
106 dget(parent);
107 should_update_children = true;
108 }
109
110 spin_unlock(&dentry->d_lock);
111
112 if (send) {
113 /* we are notifying a parent so come up with the new mask which
114 * specifies these are events which came from a child. */
115 mask |= FS_EVENT_ON_CHILD;
116
117 fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
118 dentry->d_name.name, 0);
119 dput(parent);
120 }
121
122 if (unlikely(should_update_children)) {
123 __fsnotify_update_child_dentry_flags(p_inode);
124 dput(parent);
125 }
126}
127EXPORT_SYMBOL_GPL(__fsnotify_parent);
128
129/*
130 * This is the main call to fsnotify. The VFS calls into hook specific functions
131 * in linux/fsnotify.h. Those functions then in turn call here. Here will call
132 * out to all of the registered fsnotify_group. Those groups can then use the
133 * notification event in whatever means they feel necessary.
134 */
135void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const char *file_name, u32 cookie)
136{
137 struct fsnotify_group *group;
138 struct fsnotify_event *event = NULL;
139 int idx;
140 /* global tests shouldn't care about events on child only the specific event */
141 __u32 test_mask = (mask & ~FS_EVENT_ON_CHILD);
142
143 if (list_empty(&fsnotify_groups))
144 return;
145
146 if (!(test_mask & fsnotify_mask))
147 return;
148
149 if (!(test_mask & to_tell->i_fsnotify_mask))
150 return;
151 /*
152 * SRCU!! the groups list is very very much read only and the path is
153 * very hot. The VAST majority of events are not going to need to do
154 * anything other than walk the list so it's crazy to pre-allocate.
155 */
156 idx = srcu_read_lock(&fsnotify_grp_srcu);
157 list_for_each_entry_rcu(group, &fsnotify_groups, group_list) {
158 if (test_mask & group->mask) {
159 if (!group->ops->should_send_event(group, to_tell, mask))
160 continue;
161 if (!event) {
162 event = fsnotify_create_event(to_tell, mask, data, data_is, file_name, cookie);
163 /* shit, we OOM'd and now we can't tell, maybe
164 * someday someone else will want to do something
165 * here */
166 if (!event)
167 break;
168 }
169 group->ops->handle_event(group, event);
170 }
171 }
172 srcu_read_unlock(&fsnotify_grp_srcu, idx);
173 /*
174 * fsnotify_create_event() took a reference so the event can't be cleaned
175 * up while we are still trying to add it to lists, drop that one.
176 */
177 if (event)
178 fsnotify_put_event(event);
179}
180EXPORT_SYMBOL_GPL(fsnotify);
181
182static __init int fsnotify_init(void)
183{
184 return init_srcu_struct(&fsnotify_grp_srcu);
185}
186subsys_initcall(fsnotify_init);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
new file mode 100644
index 000000000000..4dc240824b2d
--- /dev/null
+++ b/fs/notify/fsnotify.h
@@ -0,0 +1,34 @@
1#ifndef __FS_NOTIFY_FSNOTIFY_H_
2#define __FS_NOTIFY_FSNOTIFY_H_
3
4#include <linux/list.h>
5#include <linux/fsnotify.h>
6#include <linux/srcu.h>
7#include <linux/types.h>
8
9/* protects reads of fsnotify_groups */
10extern struct srcu_struct fsnotify_grp_srcu;
11/* all groups which receive fsnotify events */
12extern struct list_head fsnotify_groups;
13/* all bitwise OR of all event types (FS_*) for all fsnotify_groups */
14extern __u32 fsnotify_mask;
15
16/* destroy all events sitting in this groups notification queue */
17extern void fsnotify_flush_notify(struct fsnotify_group *group);
18
19/* final kfree of a group */
20extern void fsnotify_final_destroy_group(struct fsnotify_group *group);
21
22/* run the list of all marks associated with inode and flag them to be freed */
23extern void fsnotify_clear_marks_by_inode(struct inode *inode);
24/*
25 * update the dentry->d_flags of all of inode's children to indicate if inode cares
26 * about events that happen to its children.
27 */
28extern void __fsnotify_update_child_dentry_flags(struct inode *inode);
29
30/* allocate and destroy and event holder to attach events to notification/access queues */
31extern struct fsnotify_event_holder *fsnotify_alloc_event_holder(void);
32extern void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder);
33
34#endif /* __FS_NOTIFY_FSNOTIFY_H_ */
diff --git a/fs/notify/group.c b/fs/notify/group.c
new file mode 100644
index 000000000000..0e1677144bc5
--- /dev/null
+++ b/fs/notify/group.c
@@ -0,0 +1,254 @@
1/*
2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; see the file COPYING. If not, write to
16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19#include <linux/list.h>
20#include <linux/mutex.h>
21#include <linux/slab.h>
22#include <linux/srcu.h>
23#include <linux/rculist.h>
24#include <linux/wait.h>
25
26#include <linux/fsnotify_backend.h>
27#include "fsnotify.h"
28
29#include <asm/atomic.h>
30
31/* protects writes to fsnotify_groups and fsnotify_mask */
32static DEFINE_MUTEX(fsnotify_grp_mutex);
33/* protects reads while running the fsnotify_groups list */
34struct srcu_struct fsnotify_grp_srcu;
35/* all groups registered to receive filesystem notifications */
36LIST_HEAD(fsnotify_groups);
37/* bitwise OR of all events (FS_*) interesting to some group on this system */
38__u32 fsnotify_mask;
39
40/*
41 * When a new group registers or changes it's set of interesting events
42 * this function updates the fsnotify_mask to contain all interesting events
43 */
44void fsnotify_recalc_global_mask(void)
45{
46 struct fsnotify_group *group;
47 __u32 mask = 0;
48 int idx;
49
50 idx = srcu_read_lock(&fsnotify_grp_srcu);
51 list_for_each_entry_rcu(group, &fsnotify_groups, group_list)
52 mask |= group->mask;
53 srcu_read_unlock(&fsnotify_grp_srcu, idx);
54 fsnotify_mask = mask;
55}
56
57/*
58 * Update the group->mask by running all of the marks associated with this
59 * group and finding the bitwise | of all of the mark->mask. If we change
60 * the group->mask we need to update the global mask of events interesting
61 * to the system.
62 */
63void fsnotify_recalc_group_mask(struct fsnotify_group *group)
64{
65 __u32 mask = 0;
66 __u32 old_mask = group->mask;
67 struct fsnotify_mark_entry *entry;
68
69 spin_lock(&group->mark_lock);
70 list_for_each_entry(entry, &group->mark_entries, g_list)
71 mask |= entry->mask;
72 spin_unlock(&group->mark_lock);
73
74 group->mask = mask;
75
76 if (old_mask != mask)
77 fsnotify_recalc_global_mask();
78}
79
80/*
81 * Take a reference to a group so things found under the fsnotify_grp_mutex
82 * can't get freed under us
83 */
84static void fsnotify_get_group(struct fsnotify_group *group)
85{
86 atomic_inc(&group->refcnt);
87}
88
89/*
90 * Final freeing of a group
91 */
92void fsnotify_final_destroy_group(struct fsnotify_group *group)
93{
94 /* clear the notification queue of all events */
95 fsnotify_flush_notify(group);
96
97 if (group->ops->free_group_priv)
98 group->ops->free_group_priv(group);
99
100 kfree(group);
101}
102
103/*
104 * Trying to get rid of a group. We need to first get rid of any outstanding
105 * allocations and then free the group. Remember that fsnotify_clear_marks_by_group
106 * could miss marks that are being freed by inode and those marks could still
107 * hold a reference to this group (via group->num_marks) If we get into that
108 * situtation, the fsnotify_final_destroy_group will get called when that final
109 * mark is freed.
110 */
111static void fsnotify_destroy_group(struct fsnotify_group *group)
112{
113 /* clear all inode mark entries for this group */
114 fsnotify_clear_marks_by_group(group);
115
116 /* past the point of no return, matches the initial value of 1 */
117 if (atomic_dec_and_test(&group->num_marks))
118 fsnotify_final_destroy_group(group);
119}
120
121/*
122 * Remove this group from the global list of groups that will get events
123 * this can be done even if there are still references and things still using
124 * this group. This just stops the group from getting new events.
125 */
126static void __fsnotify_evict_group(struct fsnotify_group *group)
127{
128 BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
129
130 if (group->on_group_list)
131 list_del_rcu(&group->group_list);
132 group->on_group_list = 0;
133}
134
135/*
136 * Called when a group is no longer interested in getting events. This can be
137 * used if a group is misbehaving or if for some reason a group should no longer
138 * get any filesystem events.
139 */
140void fsnotify_evict_group(struct fsnotify_group *group)
141{
142 mutex_lock(&fsnotify_grp_mutex);
143 __fsnotify_evict_group(group);
144 mutex_unlock(&fsnotify_grp_mutex);
145}
146
147/*
148 * Drop a reference to a group. Free it if it's through.
149 */
150void fsnotify_put_group(struct fsnotify_group *group)
151{
152 if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex))
153 return;
154
155 /*
156 * OK, now we know that there's no other users *and* we hold mutex,
157 * so no new references will appear
158 */
159 __fsnotify_evict_group(group);
160
161 /*
162 * now it's off the list, so the only thing we might care about is
163 * srcu access....
164 */
165 mutex_unlock(&fsnotify_grp_mutex);
166 synchronize_srcu(&fsnotify_grp_srcu);
167
168 /* and now it is really dead. _Nothing_ could be seeing it */
169 fsnotify_recalc_global_mask();
170 fsnotify_destroy_group(group);
171}
172
173/*
174 * Simply run the fsnotify_groups list and find a group which matches
175 * the given parameters. If a group is found we take a reference to that
176 * group.
177 */
178static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask,
179 const struct fsnotify_ops *ops)
180{
181 struct fsnotify_group *group_iter;
182 struct fsnotify_group *group = NULL;
183
184 BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex));
185
186 list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) {
187 if (group_iter->group_num == group_num) {
188 if ((group_iter->mask == mask) &&
189 (group_iter->ops == ops)) {
190 fsnotify_get_group(group_iter);
191 group = group_iter;
192 } else
193 group = ERR_PTR(-EEXIST);
194 }
195 }
196 return group;
197}
198
199/*
200 * Either finds an existing group which matches the group_num, mask, and ops or
201 * creates a new group and adds it to the global group list. In either case we
202 * take a reference for the group returned.
203 */
204struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask,
205 const struct fsnotify_ops *ops)
206{
207 struct fsnotify_group *group, *tgroup;
208
209 /* very low use, simpler locking if we just always alloc */
210 group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL);
211 if (!group)
212 return ERR_PTR(-ENOMEM);
213
214 atomic_set(&group->refcnt, 1);
215
216 group->on_group_list = 0;
217 group->group_num = group_num;
218 group->mask = mask;
219
220 mutex_init(&group->notification_mutex);
221 INIT_LIST_HEAD(&group->notification_list);
222 init_waitqueue_head(&group->notification_waitq);
223 group->q_len = 0;
224 group->max_events = UINT_MAX;
225
226 spin_lock_init(&group->mark_lock);
227 atomic_set(&group->num_marks, 0);
228 INIT_LIST_HEAD(&group->mark_entries);
229
230 group->ops = ops;
231
232 mutex_lock(&fsnotify_grp_mutex);
233 tgroup = fsnotify_find_group(group_num, mask, ops);
234 if (tgroup) {
235 /* group already exists */
236 mutex_unlock(&fsnotify_grp_mutex);
237 /* destroy the new one we made */
238 fsnotify_put_group(group);
239 return tgroup;
240 }
241
242 /* group not found, add a new one */
243 list_add_rcu(&group->group_list, &fsnotify_groups);
244 group->on_group_list = 1;
245 /* being on the fsnotify_groups list holds one num_marks */
246 atomic_inc(&group->num_marks);
247
248 mutex_unlock(&fsnotify_grp_mutex);
249
250 if (mask)
251 fsnotify_recalc_global_mask();
252
253 return group;
254}
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
new file mode 100644
index 000000000000..c8a07c65482b
--- /dev/null
+++ b/fs/notify/inode_mark.c
@@ -0,0 +1,426 @@
1/*
2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; see the file COPYING. If not, write to
16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19/*
20 * fsnotify inode mark locking/lifetime/and refcnting
21 *
22 * REFCNT:
23 * The mark->refcnt tells how many "things" in the kernel currently are
24 * referencing this object. The object typically will live inside the kernel
25 * with a refcnt of 2, one for each list it is on (i_list, g_list). Any task
26 * which can find this object holding the appropriete locks, can take a reference
27 * and the object itself is guarenteed to survive until the reference is dropped.
28 *
29 * LOCKING:
30 * There are 3 spinlocks involved with fsnotify inode marks and they MUST
31 * be taken in order as follows:
32 *
33 * entry->lock
34 * group->mark_lock
35 * inode->i_lock
36 *
37 * entry->lock protects 2 things, entry->group and entry->inode. You must hold
38 * that lock to dereference either of these things (they could be NULL even with
39 * the lock)
40 *
41 * group->mark_lock protects the mark_entries list anchored inside a given group
42 * and each entry is hooked via the g_list. It also sorta protects the
43 * free_g_list, which when used is anchored by a private list on the stack of the
44 * task which held the group->mark_lock.
45 *
46 * inode->i_lock protects the i_fsnotify_mark_entries list anchored inside a
47 * given inode and each entry is hooked via the i_list. (and sorta the
48 * free_i_list)
49 *
50 *
51 * LIFETIME:
52 * Inode marks survive between when they are added to an inode and when their
53 * refcnt==0.
54 *
55 * The inode mark can be cleared for a number of different reasons including:
56 * - The inode is unlinked for the last time. (fsnotify_inode_remove)
57 * - The inode is being evicted from cache. (fsnotify_inode_delete)
58 * - The fs the inode is on is unmounted. (fsnotify_inode_delete/fsnotify_unmount_inodes)
59 * - Something explicitly requests that it be removed. (fsnotify_destroy_mark_by_entry)
60 * - The fsnotify_group associated with the mark is going away and all such marks
61 * need to be cleaned up. (fsnotify_clear_marks_by_group)
62 *
63 * Worst case we are given an inode and need to clean up all the marks on that
64 * inode. We take i_lock and walk the i_fsnotify_mark_entries safely. For each
65 * mark on the list we take a reference (so the mark can't disappear under us).
66 * We remove that mark form the inode's list of marks and we add this mark to a
67 * private list anchored on the stack using i_free_list; At this point we no
68 * longer fear anything finding the mark using the inode's list of marks.
69 *
70 * We can safely and locklessly run the private list on the stack of everything
71 * we just unattached from the original inode. For each mark on the private list
72 * we grab the mark-> and can thus dereference mark->group and mark->inode. If
73 * we see the group and inode are not NULL we take those locks. Now holding all
74 * 3 locks we can completely remove the mark from other tasks finding it in the
75 * future. Remember, 10 things might already be referencing this mark, but they
76 * better be holding a ref. We drop our reference we took before we unhooked it
77 * from the inode. When the ref hits 0 we can free the mark.
78 *
79 * Very similarly for freeing by group, except we use free_g_list.
80 *
81 * This has the very interesting property of being able to run concurrently with
82 * any (or all) other directions.
83 */
84
85#include <linux/fs.h>
86#include <linux/init.h>
87#include <linux/kernel.h>
88#include <linux/module.h>
89#include <linux/mutex.h>
90#include <linux/slab.h>
91#include <linux/spinlock.h>
92#include <linux/writeback.h> /* for inode_lock */
93
94#include <asm/atomic.h>
95
96#include <linux/fsnotify_backend.h>
97#include "fsnotify.h"
98
99void fsnotify_get_mark(struct fsnotify_mark_entry *entry)
100{
101 atomic_inc(&entry->refcnt);
102}
103
104void fsnotify_put_mark(struct fsnotify_mark_entry *entry)
105{
106 if (atomic_dec_and_test(&entry->refcnt))
107 entry->free_mark(entry);
108}
109
110/*
111 * Recalculate the mask of events relevant to a given inode locked.
112 */
113static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
114{
115 struct fsnotify_mark_entry *entry;
116 struct hlist_node *pos;
117 __u32 new_mask = 0;
118
119 assert_spin_locked(&inode->i_lock);
120
121 hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list)
122 new_mask |= entry->mask;
123 inode->i_fsnotify_mask = new_mask;
124}
125
126/*
127 * Recalculate the inode->i_fsnotify_mask, or the mask of all FS_* event types
128 * any notifier is interested in hearing for this inode.
129 */
130void fsnotify_recalc_inode_mask(struct inode *inode)
131{
132 spin_lock(&inode->i_lock);
133 fsnotify_recalc_inode_mask_locked(inode);
134 spin_unlock(&inode->i_lock);
135
136 __fsnotify_update_child_dentry_flags(inode);
137}
138
139/*
140 * Any time a mark is getting freed we end up here.
141 * The caller had better be holding a reference to this mark so we don't actually
142 * do the final put under the entry->lock
143 */
144void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry)
145{
146 struct fsnotify_group *group;
147 struct inode *inode;
148
149 spin_lock(&entry->lock);
150
151 group = entry->group;
152 inode = entry->inode;
153
154 BUG_ON(group && !inode);
155 BUG_ON(!group && inode);
156
157 /* if !group something else already marked this to die */
158 if (!group) {
159 spin_unlock(&entry->lock);
160 return;
161 }
162
163 /* 1 from caller and 1 for being on i_list/g_list */
164 BUG_ON(atomic_read(&entry->refcnt) < 2);
165
166 spin_lock(&group->mark_lock);
167 spin_lock(&inode->i_lock);
168
169 hlist_del_init(&entry->i_list);
170 entry->inode = NULL;
171
172 list_del_init(&entry->g_list);
173 entry->group = NULL;
174
175 fsnotify_put_mark(entry); /* for i_list and g_list */
176
177 /*
178 * this mark is now off the inode->i_fsnotify_mark_entries list and we
179 * hold the inode->i_lock, so this is the perfect time to update the
180 * inode->i_fsnotify_mask
181 */
182 fsnotify_recalc_inode_mask_locked(inode);
183
184 spin_unlock(&inode->i_lock);
185 spin_unlock(&group->mark_lock);
186 spin_unlock(&entry->lock);
187
188 /*
189 * Some groups like to know that marks are being freed. This is a
190 * callback to the group function to let it know that this entry
191 * is being freed.
192 */
193 if (group->ops->freeing_mark)
194 group->ops->freeing_mark(entry, group);
195
196 /*
197 * __fsnotify_update_child_dentry_flags(inode);
198 *
199 * I really want to call that, but we can't, we have no idea if the inode
200 * still exists the second we drop the entry->lock.
201 *
202 * The next time an event arrive to this inode from one of it's children
203 * __fsnotify_parent will see that the inode doesn't care about it's
204 * children and will update all of these flags then. So really this
205 * is just a lazy update (and could be a perf win...)
206 */
207
208
209 iput(inode);
210
211 /*
212 * it's possible that this group tried to destroy itself, but this
213 * this mark was simultaneously being freed by inode. If that's the
214 * case, we finish freeing the group here.
215 */
216 if (unlikely(atomic_dec_and_test(&group->num_marks)))
217 fsnotify_final_destroy_group(group);
218}
219
220/*
221 * Given a group, destroy all of the marks associated with that group.
222 */
223void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
224{
225 struct fsnotify_mark_entry *lentry, *entry;
226 LIST_HEAD(free_list);
227
228 spin_lock(&group->mark_lock);
229 list_for_each_entry_safe(entry, lentry, &group->mark_entries, g_list) {
230 list_add(&entry->free_g_list, &free_list);
231 list_del_init(&entry->g_list);
232 fsnotify_get_mark(entry);
233 }
234 spin_unlock(&group->mark_lock);
235
236 list_for_each_entry_safe(entry, lentry, &free_list, free_g_list) {
237 fsnotify_destroy_mark_by_entry(entry);
238 fsnotify_put_mark(entry);
239 }
240}
241
242/*
243 * Given an inode, destroy all of the marks associated with that inode.
244 */
245void fsnotify_clear_marks_by_inode(struct inode *inode)
246{
247 struct fsnotify_mark_entry *entry, *lentry;
248 struct hlist_node *pos, *n;
249 LIST_HEAD(free_list);
250
251 spin_lock(&inode->i_lock);
252 hlist_for_each_entry_safe(entry, pos, n, &inode->i_fsnotify_mark_entries, i_list) {
253 list_add(&entry->free_i_list, &free_list);
254 hlist_del_init(&entry->i_list);
255 fsnotify_get_mark(entry);
256 }
257 spin_unlock(&inode->i_lock);
258
259 list_for_each_entry_safe(entry, lentry, &free_list, free_i_list) {
260 fsnotify_destroy_mark_by_entry(entry);
261 fsnotify_put_mark(entry);
262 }
263}
264
265/*
266 * given a group and inode, find the mark associated with that combination.
267 * if found take a reference to that mark and return it, else return NULL
268 */
269struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group,
270 struct inode *inode)
271{
272 struct fsnotify_mark_entry *entry;
273 struct hlist_node *pos;
274
275 assert_spin_locked(&inode->i_lock);
276
277 hlist_for_each_entry(entry, pos, &inode->i_fsnotify_mark_entries, i_list) {
278 if (entry->group == group) {
279 fsnotify_get_mark(entry);
280 return entry;
281 }
282 }
283 return NULL;
284}
285
286/*
287 * Nothing fancy, just initialize lists and locks and counters.
288 */
289void fsnotify_init_mark(struct fsnotify_mark_entry *entry,
290 void (*free_mark)(struct fsnotify_mark_entry *entry))
291
292{
293 spin_lock_init(&entry->lock);
294 atomic_set(&entry->refcnt, 1);
295 INIT_HLIST_NODE(&entry->i_list);
296 entry->group = NULL;
297 entry->mask = 0;
298 entry->inode = NULL;
299 entry->free_mark = free_mark;
300}
301
302/*
303 * Attach an initialized mark entry to a given group and inode.
304 * These marks may be used for the fsnotify backend to determine which
305 * event types should be delivered to which group and for which inodes.
306 */
307int fsnotify_add_mark(struct fsnotify_mark_entry *entry,
308 struct fsnotify_group *group, struct inode *inode)
309{
310 struct fsnotify_mark_entry *lentry;
311 int ret = 0;
312
313 inode = igrab(inode);
314 if (unlikely(!inode))
315 return -EINVAL;
316
317 /*
318 * LOCKING ORDER!!!!
319 * entry->lock
320 * group->mark_lock
321 * inode->i_lock
322 */
323 spin_lock(&entry->lock);
324 spin_lock(&group->mark_lock);
325 spin_lock(&inode->i_lock);
326
327 entry->group = group;
328 entry->inode = inode;
329
330 lentry = fsnotify_find_mark_entry(group, inode);
331 if (!lentry) {
332 hlist_add_head(&entry->i_list, &inode->i_fsnotify_mark_entries);
333 list_add(&entry->g_list, &group->mark_entries);
334
335 fsnotify_get_mark(entry); /* for i_list and g_list */
336
337 atomic_inc(&group->num_marks);
338
339 fsnotify_recalc_inode_mask_locked(inode);
340 }
341
342 spin_unlock(&inode->i_lock);
343 spin_unlock(&group->mark_lock);
344 spin_unlock(&entry->lock);
345
346 if (lentry) {
347 ret = -EEXIST;
348 iput(inode);
349 fsnotify_put_mark(lentry);
350 } else {
351 __fsnotify_update_child_dentry_flags(inode);
352 }
353
354 return ret;
355}
356
357/**
358 * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
359 * @list: list of inodes being unmounted (sb->s_inodes)
360 *
361 * Called with inode_lock held, protecting the unmounting super block's list
362 * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
363 * We temporarily drop inode_lock, however, and CAN block.
364 */
365void fsnotify_unmount_inodes(struct list_head *list)
366{
367 struct inode *inode, *next_i, *need_iput = NULL;
368
369 list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
370 struct inode *need_iput_tmp;
371
372 /*
373 * We cannot __iget() an inode in state I_CLEAR, I_FREEING,
374 * I_WILL_FREE, or I_NEW which is fine because by that point
375 * the inode cannot have any associated watches.
376 */
377 if (inode->i_state & (I_CLEAR|I_FREEING|I_WILL_FREE|I_NEW))
378 continue;
379
380 /*
381 * If i_count is zero, the inode cannot have any watches and
382 * doing an __iget/iput with MS_ACTIVE clear would actually
383 * evict all inodes with zero i_count from icache which is
384 * unnecessarily violent and may in fact be illegal to do.
385 */
386 if (!atomic_read(&inode->i_count))
387 continue;
388
389 need_iput_tmp = need_iput;
390 need_iput = NULL;
391
392 /* In case fsnotify_inode_delete() drops a reference. */
393 if (inode != need_iput_tmp)
394 __iget(inode);
395 else
396 need_iput_tmp = NULL;
397
398 /* In case the dropping of a reference would nuke next_i. */
399 if ((&next_i->i_sb_list != list) &&
400 atomic_read(&next_i->i_count) &&
401 !(next_i->i_state & (I_CLEAR | I_FREEING | I_WILL_FREE))) {
402 __iget(next_i);
403 need_iput = next_i;
404 }
405
406 /*
407 * We can safely drop inode_lock here because we hold
408 * references on both inode and next_i. Also no new inodes
409 * will be added since the umount has begun. Finally,
410 * iprune_mutex keeps shrink_icache_memory() away.
411 */
412 spin_unlock(&inode_lock);
413
414 if (need_iput_tmp)
415 iput(need_iput_tmp);
416
417 /* for each watch, send FS_UNMOUNT and then remove it */
418 fsnotify(inode, FS_UNMOUNT, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
419
420 fsnotify_inode_delete(inode);
421
422 iput(inode);
423
424 spin_lock(&inode_lock);
425 }
426}
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index 446792841023..5356884289a1 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -1,26 +1,30 @@
1config INOTIFY 1config INOTIFY
2 bool "Inotify file change notification support" 2 bool "Inotify file change notification support"
3 default y 3 default n
4 ---help--- 4 ---help---
5 Say Y here to enable inotify support. Inotify is a file change 5 Say Y here to enable legacy in kernel inotify support. Inotify is a
6 notification system and a replacement for dnotify. Inotify fixes 6 file change notification system. It is a replacement for dnotify.
7 numerous shortcomings in dnotify and introduces several new features 7 This option only provides the legacy inotify in kernel API. There
8 including multiple file events, one-shot support, and unmount 8 are no in tree kernel users of this interface since it is deprecated.
9 notification. 9 You only need this if you are loading an out of tree kernel module
10 that uses inotify.
10 11
11 For more information, see <file:Documentation/filesystems/inotify.txt> 12 For more information, see <file:Documentation/filesystems/inotify.txt>
12 13
13 If unsure, say Y. 14 If unsure, say N.
14 15
15config INOTIFY_USER 16config INOTIFY_USER
16 bool "Inotify support for userspace" 17 bool "Inotify support for userspace"
17 depends on INOTIFY 18 depends on FSNOTIFY
18 default y 19 default y
19 ---help--- 20 ---help---
20 Say Y here to enable inotify support for userspace, including the 21 Say Y here to enable inotify support for userspace, including the
21 associated system calls. Inotify allows monitoring of both files and 22 associated system calls. Inotify allows monitoring of both files and
22 directories via a single open fd. Events are read from the file 23 directories via a single open fd. Events are read from the file
23 descriptor, which is also select()- and poll()-able. 24 descriptor, which is also select()- and poll()-able.
25 Inotify fixes numerous shortcomings in dnotify and introduces several
26 new features including multiple file events, one-shot support, and
27 unmount notification.
24 28
25 For more information, see <file:Documentation/filesystems/inotify.txt> 29 For more information, see <file:Documentation/filesystems/inotify.txt>
26 30
diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile
index e290f3bb9d8d..943828171362 100644
--- a/fs/notify/inotify/Makefile
+++ b/fs/notify/inotify/Makefile
@@ -1,2 +1,2 @@
1obj-$(CONFIG_INOTIFY) += inotify.o 1obj-$(CONFIG_INOTIFY) += inotify.o
2obj-$(CONFIG_INOTIFY_USER) += inotify_user.o 2obj-$(CONFIG_INOTIFY_USER) += inotify_fsnotify.o inotify_user.o
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
index 220c13f0d73d..40b1cf914ccb 100644
--- a/fs/notify/inotify/inotify.c
+++ b/fs/notify/inotify/inotify.c
@@ -32,6 +32,7 @@
32#include <linux/list.h> 32#include <linux/list.h>
33#include <linux/writeback.h> 33#include <linux/writeback.h>
34#include <linux/inotify.h> 34#include <linux/inotify.h>
35#include <linux/fsnotify_backend.h>
35 36
36static atomic_t inotify_cookie; 37static atomic_t inotify_cookie;
37 38
@@ -905,6 +906,25 @@ EXPORT_SYMBOL_GPL(inotify_rm_watch);
905 */ 906 */
906static int __init inotify_setup(void) 907static int __init inotify_setup(void)
907{ 908{
909 BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
910 BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
911 BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
912 BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
913 BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
914 BUILD_BUG_ON(IN_OPEN != FS_OPEN);
915 BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
916 BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
917 BUILD_BUG_ON(IN_CREATE != FS_CREATE);
918 BUILD_BUG_ON(IN_DELETE != FS_DELETE);
919 BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
920 BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
921 BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
922
923 BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
924 BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
925 BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
926 BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
927
908 atomic_set(&inotify_cookie, 0); 928 atomic_set(&inotify_cookie, 0);
909 929
910 return 0; 930 return 0;
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
new file mode 100644
index 000000000000..ea2605a58b8a
--- /dev/null
+++ b/fs/notify/inotify/inotify.h
@@ -0,0 +1,21 @@
1#include <linux/fsnotify_backend.h>
2#include <linux/inotify.h>
3#include <linux/slab.h> /* struct kmem_cache */
4
5extern struct kmem_cache *event_priv_cachep;
6
7struct inotify_event_private_data {
8 struct fsnotify_event_private_data fsnotify_event_priv_data;
9 int wd;
10};
11
12struct inotify_inode_mark_entry {
13 /* fsnotify_mark_entry MUST be the first thing */
14 struct fsnotify_mark_entry fsn_entry;
15 int wd;
16};
17
18extern void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group);
19extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
20
21extern const struct fsnotify_ops inotify_fsnotify_ops;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
new file mode 100644
index 000000000000..7ef75b83247e
--- /dev/null
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -0,0 +1,138 @@
1/*
2 * fs/inotify_user.c - inotify support for userspace
3 *
4 * Authors:
5 * John McCutchan <ttb@tentacle.dhs.org>
6 * Robert Love <rml@novell.com>
7 *
8 * Copyright (C) 2005 John McCutchan
9 * Copyright 2006 Hewlett-Packard Development Company, L.P.
10 *
11 * Copyright (C) 2009 Eric Paris <Red Hat Inc>
12 * inotify was largely rewriten to make use of the fsnotify infrastructure
13 *
14 * This program is free software; you can redistribute it and/or modify it
15 * under the terms of the GNU General Public License as published by the
16 * Free Software Foundation; either version 2, or (at your option) any
17 * later version.
18 *
19 * This program is distributed in the hope that it will be useful, but
20 * WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 * General Public License for more details.
23 */
24
25#include <linux/fs.h> /* struct inode */
26#include <linux/fsnotify_backend.h>
27#include <linux/inotify.h>
28#include <linux/path.h> /* struct path */
29#include <linux/slab.h> /* kmem_* */
30#include <linux/types.h>
31
32#include "inotify.h"
33
34static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event)
35{
36 struct fsnotify_mark_entry *entry;
37 struct inotify_inode_mark_entry *ientry;
38 struct inode *to_tell;
39 struct inotify_event_private_data *event_priv;
40 struct fsnotify_event_private_data *fsn_event_priv;
41 int wd, ret;
42
43 to_tell = event->to_tell;
44
45 spin_lock(&to_tell->i_lock);
46 entry = fsnotify_find_mark_entry(group, to_tell);
47 spin_unlock(&to_tell->i_lock);
48 /* race with watch removal? We already passes should_send */
49 if (unlikely(!entry))
50 return 0;
51 ientry = container_of(entry, struct inotify_inode_mark_entry,
52 fsn_entry);
53 wd = ientry->wd;
54
55 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
56 if (unlikely(!event_priv))
57 return -ENOMEM;
58
59 fsn_event_priv = &event_priv->fsnotify_event_priv_data;
60
61 fsn_event_priv->group = group;
62 event_priv->wd = wd;
63
64 ret = fsnotify_add_notify_event(group, event, fsn_event_priv);
65 /* EEXIST is not an error */
66 if (ret == -EEXIST)
67 ret = 0;
68
69 /* did event_priv get attached? */
70 if (list_empty(&fsn_event_priv->event_list))
71 inotify_free_event_priv(fsn_event_priv);
72
73 /*
74 * If we hold the entry until after the event is on the queue
75 * IN_IGNORED won't be able to pass this event in the queue
76 */
77 fsnotify_put_mark(entry);
78
79 return ret;
80}
81
82static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
83{
84 inotify_destroy_mark_entry(entry, group);
85}
86
87static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask)
88{
89 struct fsnotify_mark_entry *entry;
90 bool send;
91
92 spin_lock(&inode->i_lock);
93 entry = fsnotify_find_mark_entry(group, inode);
94 spin_unlock(&inode->i_lock);
95 if (!entry)
96 return false;
97
98 mask = (mask & ~FS_EVENT_ON_CHILD);
99 send = (entry->mask & mask);
100
101 /* find took a reference */
102 fsnotify_put_mark(entry);
103
104 return send;
105}
106
107static int idr_callback(int id, void *p, void *data)
108{
109 BUG();
110 return 0;
111}
112
113static void inotify_free_group_priv(struct fsnotify_group *group)
114{
115 /* ideally the idr is empty and we won't hit the BUG in teh callback */
116 idr_for_each(&group->inotify_data.idr, idr_callback, NULL);
117 idr_remove_all(&group->inotify_data.idr);
118 idr_destroy(&group->inotify_data.idr);
119}
120
121void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv)
122{
123 struct inotify_event_private_data *event_priv;
124
125
126 event_priv = container_of(fsn_event_priv, struct inotify_event_private_data,
127 fsnotify_event_priv_data);
128
129 kmem_cache_free(event_priv_cachep, event_priv);
130}
131
132const struct fsnotify_ops inotify_fsnotify_ops = {
133 .handle_event = inotify_handle_event,
134 .should_send_event = inotify_should_send_event,
135 .free_group_priv = inotify_free_group_priv,
136 .free_event_priv = inotify_free_event_priv,
137 .freeing_mark = inotify_freeing_mark,
138};
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 1634319e2404..982a412ac5bc 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -8,6 +8,9 @@
8 * Copyright (C) 2005 John McCutchan 8 * Copyright (C) 2005 John McCutchan
9 * Copyright 2006 Hewlett-Packard Development Company, L.P. 9 * Copyright 2006 Hewlett-Packard Development Company, L.P.
10 * 10 *
11 * Copyright (C) 2009 Eric Paris <Red Hat Inc>
12 * inotify was largely rewriten to make use of the fsnotify infrastructure
13 *
11 * This program is free software; you can redistribute it and/or modify it 14 * This program is free software; you can redistribute it and/or modify it
12 * under the terms of the GNU General Public License as published by the 15 * under the terms of the GNU General Public License as published by the
13 * Free Software Foundation; either version 2, or (at your option) any 16 * Free Software Foundation; either version 2, or (at your option) any
@@ -19,94 +22,48 @@
19 * General Public License for more details. 22 * General Public License for more details.
20 */ 23 */
21 24
22#include <linux/kernel.h>
23#include <linux/sched.h>
24#include <linux/slab.h>
25#include <linux/fs.h>
26#include <linux/file.h> 25#include <linux/file.h>
27#include <linux/mount.h> 26#include <linux/fs.h> /* struct inode */
28#include <linux/namei.h> 27#include <linux/fsnotify_backend.h>
29#include <linux/poll.h> 28#include <linux/idr.h>
30#include <linux/init.h> 29#include <linux/init.h> /* module_init */
31#include <linux/list.h>
32#include <linux/inotify.h> 30#include <linux/inotify.h>
31#include <linux/kernel.h> /* roundup() */
32#include <linux/magic.h> /* superblock magic number */
33#include <linux/mount.h> /* mntget */
34#include <linux/namei.h> /* LOOKUP_FOLLOW */
35#include <linux/path.h> /* struct path */
36#include <linux/sched.h> /* struct user */
37#include <linux/slab.h> /* struct kmem_cache */
33#include <linux/syscalls.h> 38#include <linux/syscalls.h>
34#include <linux/magic.h> 39#include <linux/types.h>
40#include <linux/uaccess.h>
41#include <linux/poll.h>
42#include <linux/wait.h>
35 43
36#include <asm/ioctls.h> 44#include "inotify.h"
37 45
38static struct kmem_cache *watch_cachep __read_mostly; 46#include <asm/ioctls.h>
39static struct kmem_cache *event_cachep __read_mostly;
40 47
41static struct vfsmount *inotify_mnt __read_mostly; 48static struct vfsmount *inotify_mnt __read_mostly;
42 49
50/* this just sits here and wastes global memory. used to just pad userspace messages with zeros */
51static struct inotify_event nul_inotify_event;
52
43/* these are configurable via /proc/sys/fs/inotify/ */ 53/* these are configurable via /proc/sys/fs/inotify/ */
44static int inotify_max_user_instances __read_mostly; 54static int inotify_max_user_instances __read_mostly;
45static int inotify_max_user_watches __read_mostly;
46static int inotify_max_queued_events __read_mostly; 55static int inotify_max_queued_events __read_mostly;
56int inotify_max_user_watches __read_mostly;
47 57
48/* 58static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
49 * Lock ordering: 59struct kmem_cache *event_priv_cachep __read_mostly;
50 * 60static struct fsnotify_event *inotify_ignored_event;
51 * inotify_dev->up_mutex (ensures we don't re-add the same watch)
52 * inode->inotify_mutex (protects inode's watch list)
53 * inotify_handle->mutex (protects inotify_handle's watch list)
54 * inotify_dev->ev_mutex (protects device's event queue)
55 */
56 61
57/* 62/*
58 * Lifetimes of the main data structures: 63 * When inotify registers a new group it increments this and uses that
59 * 64 * value as an offset to set the fsnotify group "name" and priority.
60 * inotify_device: Lifetime is managed by reference count, from
61 * sys_inotify_init() until release. Additional references can bump the count
62 * via get_inotify_dev() and drop the count via put_inotify_dev().
63 *
64 * inotify_user_watch: Lifetime is from create_watch() to the receipt of an
65 * IN_IGNORED event from inotify, or when using IN_ONESHOT, to receipt of the
66 * first event, or to inotify_destroy().
67 */ 65 */
68 66static atomic_t inotify_grp_num;
69/*
70 * struct inotify_device - represents an inotify instance
71 *
72 * This structure is protected by the mutex 'mutex'.
73 */
74struct inotify_device {
75 wait_queue_head_t wq; /* wait queue for i/o */
76 struct mutex ev_mutex; /* protects event queue */
77 struct mutex up_mutex; /* synchronizes watch updates */
78 struct list_head events; /* list of queued events */
79 struct user_struct *user; /* user who opened this dev */
80 struct inotify_handle *ih; /* inotify handle */
81 struct fasync_struct *fa; /* async notification */
82 atomic_t count; /* reference count */
83 unsigned int queue_size; /* size of the queue (bytes) */
84 unsigned int event_count; /* number of pending events */
85 unsigned int max_events; /* maximum number of events */
86};
87
88/*
89 * struct inotify_kernel_event - An inotify event, originating from a watch and
90 * queued for user-space. A list of these is attached to each instance of the
91 * device. In read(), this list is walked and all events that can fit in the
92 * buffer are returned.
93 *
94 * Protected by dev->ev_mutex of the device in which we are queued.
95 */
96struct inotify_kernel_event {
97 struct inotify_event event; /* the user-space event */
98 struct list_head list; /* entry in inotify_device's list */
99 char *name; /* filename, if any */
100};
101
102/*
103 * struct inotify_user_watch - our version of an inotify_watch, we add
104 * a reference to the associated inotify_device.
105 */
106struct inotify_user_watch {
107 struct inotify_device *dev; /* associated device */
108 struct inotify_watch wdata; /* inotify watch data */
109};
110 67
111#ifdef CONFIG_SYSCTL 68#ifdef CONFIG_SYSCTL
112 69
@@ -149,280 +106,36 @@ ctl_table inotify_table[] = {
149}; 106};
150#endif /* CONFIG_SYSCTL */ 107#endif /* CONFIG_SYSCTL */
151 108
152static inline void get_inotify_dev(struct inotify_device *dev) 109static inline __u32 inotify_arg_to_mask(u32 arg)
153{
154 atomic_inc(&dev->count);
155}
156
157static inline void put_inotify_dev(struct inotify_device *dev)
158{
159 if (atomic_dec_and_test(&dev->count)) {
160 atomic_dec(&dev->user->inotify_devs);
161 free_uid(dev->user);
162 kfree(dev);
163 }
164}
165
166/*
167 * free_inotify_user_watch - cleans up the watch and its references
168 */
169static void free_inotify_user_watch(struct inotify_watch *w)
170{
171 struct inotify_user_watch *watch;
172 struct inotify_device *dev;
173
174 watch = container_of(w, struct inotify_user_watch, wdata);
175 dev = watch->dev;
176
177 atomic_dec(&dev->user->inotify_watches);
178 put_inotify_dev(dev);
179 kmem_cache_free(watch_cachep, watch);
180}
181
182/*
183 * kernel_event - create a new kernel event with the given parameters
184 *
185 * This function can sleep.
186 */
187static struct inotify_kernel_event * kernel_event(s32 wd, u32 mask, u32 cookie,
188 const char *name)
189{
190 struct inotify_kernel_event *kevent;
191
192 kevent = kmem_cache_alloc(event_cachep, GFP_NOFS);
193 if (unlikely(!kevent))
194 return NULL;
195
196 /* we hand this out to user-space, so zero it just in case */
197 memset(&kevent->event, 0, sizeof(struct inotify_event));
198
199 kevent->event.wd = wd;
200 kevent->event.mask = mask;
201 kevent->event.cookie = cookie;
202
203 INIT_LIST_HEAD(&kevent->list);
204
205 if (name) {
206 size_t len, rem, event_size = sizeof(struct inotify_event);
207
208 /*
209 * We need to pad the filename so as to properly align an
210 * array of inotify_event structures. Because the structure is
211 * small and the common case is a small filename, we just round
212 * up to the next multiple of the structure's sizeof. This is
213 * simple and safe for all architectures.
214 */
215 len = strlen(name) + 1;
216 rem = event_size - len;
217 if (len > event_size) {
218 rem = event_size - (len % event_size);
219 if (len % event_size == 0)
220 rem = 0;
221 }
222
223 kevent->name = kmalloc(len + rem, GFP_NOFS);
224 if (unlikely(!kevent->name)) {
225 kmem_cache_free(event_cachep, kevent);
226 return NULL;
227 }
228 memcpy(kevent->name, name, len);
229 if (rem)
230 memset(kevent->name + len, 0, rem);
231 kevent->event.len = len + rem;
232 } else {
233 kevent->event.len = 0;
234 kevent->name = NULL;
235 }
236
237 return kevent;
238}
239
240/*
241 * inotify_dev_get_event - return the next event in the given dev's queue
242 *
243 * Caller must hold dev->ev_mutex.
244 */
245static inline struct inotify_kernel_event *
246inotify_dev_get_event(struct inotify_device *dev)
247{
248 return list_entry(dev->events.next, struct inotify_kernel_event, list);
249}
250
251/*
252 * inotify_dev_get_last_event - return the last event in the given dev's queue
253 *
254 * Caller must hold dev->ev_mutex.
255 */
256static inline struct inotify_kernel_event *
257inotify_dev_get_last_event(struct inotify_device *dev)
258{ 110{
259 if (list_empty(&dev->events)) 111 __u32 mask;
260 return NULL;
261 return list_entry(dev->events.prev, struct inotify_kernel_event, list);
262}
263 112
264/* 113 /* everything should accept their own ignored and cares about children */
265 * inotify_dev_queue_event - event handler registered with core inotify, adds 114 mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD);
266 * a new event to the given device
267 *
268 * Can sleep (calls kernel_event()).
269 */
270static void inotify_dev_queue_event(struct inotify_watch *w, u32 wd, u32 mask,
271 u32 cookie, const char *name,
272 struct inode *ignored)
273{
274 struct inotify_user_watch *watch;
275 struct inotify_device *dev;
276 struct inotify_kernel_event *kevent, *last;
277 115
278 watch = container_of(w, struct inotify_user_watch, wdata); 116 /* mask off the flags used to open the fd */
279 dev = watch->dev; 117 mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT));
280 118
281 mutex_lock(&dev->ev_mutex); 119 return mask;
282
283 /* we can safely put the watch as we don't reference it while
284 * generating the event
285 */
286 if (mask & IN_IGNORED || w->mask & IN_ONESHOT)
287 put_inotify_watch(w); /* final put */
288
289 /* coalescing: drop this event if it is a dupe of the previous */
290 last = inotify_dev_get_last_event(dev);
291 if (last && last->event.mask == mask && last->event.wd == wd &&
292 last->event.cookie == cookie) {
293 const char *lastname = last->name;
294
295 if (!name && !lastname)
296 goto out;
297 if (name && lastname && !strcmp(lastname, name))
298 goto out;
299 }
300
301 /* the queue overflowed and we already sent the Q_OVERFLOW event */
302 if (unlikely(dev->event_count > dev->max_events))
303 goto out;
304
305 /* if the queue overflows, we need to notify user space */
306 if (unlikely(dev->event_count == dev->max_events))
307 kevent = kernel_event(-1, IN_Q_OVERFLOW, cookie, NULL);
308 else
309 kevent = kernel_event(wd, mask, cookie, name);
310
311 if (unlikely(!kevent))
312 goto out;
313
314 /* queue the event and wake up anyone waiting */
315 dev->event_count++;
316 dev->queue_size += sizeof(struct inotify_event) + kevent->event.len;
317 list_add_tail(&kevent->list, &dev->events);
318 wake_up_interruptible(&dev->wq);
319 kill_fasync(&dev->fa, SIGIO, POLL_IN);
320
321out:
322 mutex_unlock(&dev->ev_mutex);
323}
324
325/*
326 * remove_kevent - cleans up the given kevent
327 *
328 * Caller must hold dev->ev_mutex.
329 */
330static void remove_kevent(struct inotify_device *dev,
331 struct inotify_kernel_event *kevent)
332{
333 list_del(&kevent->list);
334
335 dev->event_count--;
336 dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len;
337}
338
339/*
340 * free_kevent - frees the given kevent.
341 */
342static void free_kevent(struct inotify_kernel_event *kevent)
343{
344 kfree(kevent->name);
345 kmem_cache_free(event_cachep, kevent);
346}
347
348/*
349 * inotify_dev_event_dequeue - destroy an event on the given device
350 *
351 * Caller must hold dev->ev_mutex.
352 */
353static void inotify_dev_event_dequeue(struct inotify_device *dev)
354{
355 if (!list_empty(&dev->events)) {
356 struct inotify_kernel_event *kevent;
357 kevent = inotify_dev_get_event(dev);
358 remove_kevent(dev, kevent);
359 free_kevent(kevent);
360 }
361}
362
363/*
364 * find_inode - resolve a user-given path to a specific inode
365 */
366static int find_inode(const char __user *dirname, struct path *path,
367 unsigned flags)
368{
369 int error;
370
371 error = user_path_at(AT_FDCWD, dirname, flags, path);
372 if (error)
373 return error;
374 /* you can only watch an inode if you have read permissions on it */
375 error = inode_permission(path->dentry->d_inode, MAY_READ);
376 if (error)
377 path_put(path);
378 return error;
379} 120}
380 121
381/* 122static inline u32 inotify_mask_to_arg(__u32 mask)
382 * create_watch - creates a watch on the given device.
383 *
384 * Callers must hold dev->up_mutex.
385 */
386static int create_watch(struct inotify_device *dev, struct inode *inode,
387 u32 mask)
388{ 123{
389 struct inotify_user_watch *watch; 124 return mask & (IN_ALL_EVENTS | IN_ISDIR | IN_UNMOUNT | IN_IGNORED |
390 int ret; 125 IN_Q_OVERFLOW);
391
392 if (atomic_read(&dev->user->inotify_watches) >=
393 inotify_max_user_watches)
394 return -ENOSPC;
395
396 watch = kmem_cache_alloc(watch_cachep, GFP_KERNEL);
397 if (unlikely(!watch))
398 return -ENOMEM;
399
400 /* save a reference to device and bump the count to make it official */
401 get_inotify_dev(dev);
402 watch->dev = dev;
403
404 atomic_inc(&dev->user->inotify_watches);
405
406 inotify_init_watch(&watch->wdata);
407 ret = inotify_add_watch(dev->ih, &watch->wdata, inode, mask);
408 if (ret < 0)
409 free_inotify_user_watch(&watch->wdata);
410
411 return ret;
412} 126}
413 127
414/* Device Interface */ 128/* intofiy userspace file descriptor functions */
415
416static unsigned int inotify_poll(struct file *file, poll_table *wait) 129static unsigned int inotify_poll(struct file *file, poll_table *wait)
417{ 130{
418 struct inotify_device *dev = file->private_data; 131 struct fsnotify_group *group = file->private_data;
419 int ret = 0; 132 int ret = 0;
420 133
421 poll_wait(file, &dev->wq, wait); 134 poll_wait(file, &group->notification_waitq, wait);
422 mutex_lock(&dev->ev_mutex); 135 mutex_lock(&group->notification_mutex);
423 if (!list_empty(&dev->events)) 136 if (!fsnotify_notify_queue_is_empty(group))
424 ret = POLLIN | POLLRDNORM; 137 ret = POLLIN | POLLRDNORM;
425 mutex_unlock(&dev->ev_mutex); 138 mutex_unlock(&group->notification_mutex);
426 139
427 return ret; 140 return ret;
428} 141}
@@ -432,26 +145,29 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait)
432 * enough to fit in "count". Return an error pointer if 145 * enough to fit in "count". Return an error pointer if
433 * not large enough. 146 * not large enough.
434 * 147 *
435 * Called with the device ev_mutex held. 148 * Called with the group->notification_mutex held.
436 */ 149 */
437static struct inotify_kernel_event *get_one_event(struct inotify_device *dev, 150static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
438 size_t count) 151 size_t count)
439{ 152{
440 size_t event_size = sizeof(struct inotify_event); 153 size_t event_size = sizeof(struct inotify_event);
441 struct inotify_kernel_event *kevent; 154 struct fsnotify_event *event;
442 155
443 if (list_empty(&dev->events)) 156 if (fsnotify_notify_queue_is_empty(group))
444 return NULL; 157 return NULL;
445 158
446 kevent = inotify_dev_get_event(dev); 159 event = fsnotify_peek_notify_event(group);
447 if (kevent->name) 160
448 event_size += kevent->event.len; 161 event_size += roundup(event->name_len, event_size);
449 162
450 if (event_size > count) 163 if (event_size > count)
451 return ERR_PTR(-EINVAL); 164 return ERR_PTR(-EINVAL);
452 165
453 remove_kevent(dev, kevent); 166 /* held the notification_mutex the whole time, so this is the
454 return kevent; 167 * same event we peeked above */
168 fsnotify_remove_notify_event(group);
169
170 return event;
455} 171}
456 172
457/* 173/*
@@ -460,51 +176,90 @@ static struct inotify_kernel_event *get_one_event(struct inotify_device *dev,
460 * We already checked that the event size is smaller than the 176 * We already checked that the event size is smaller than the
461 * buffer we had in "get_one_event()" above. 177 * buffer we had in "get_one_event()" above.
462 */ 178 */
463static ssize_t copy_event_to_user(struct inotify_kernel_event *kevent, 179static ssize_t copy_event_to_user(struct fsnotify_group *group,
180 struct fsnotify_event *event,
464 char __user *buf) 181 char __user *buf)
465{ 182{
183 struct inotify_event inotify_event;
184 struct fsnotify_event_private_data *fsn_priv;
185 struct inotify_event_private_data *priv;
466 size_t event_size = sizeof(struct inotify_event); 186 size_t event_size = sizeof(struct inotify_event);
187 size_t name_len;
188
189 /* we get the inotify watch descriptor from the event private data */
190 spin_lock(&event->lock);
191 fsn_priv = fsnotify_remove_priv_from_event(group, event);
192 spin_unlock(&event->lock);
193
194 if (!fsn_priv)
195 inotify_event.wd = -1;
196 else {
197 priv = container_of(fsn_priv, struct inotify_event_private_data,
198 fsnotify_event_priv_data);
199 inotify_event.wd = priv->wd;
200 inotify_free_event_priv(fsn_priv);
201 }
202
203 /* round up event->name_len so it is a multiple of event_size */
204 name_len = roundup(event->name_len, event_size);
205 inotify_event.len = name_len;
206
207 inotify_event.mask = inotify_mask_to_arg(event->mask);
208 inotify_event.cookie = event->sync_cookie;
467 209
468 if (copy_to_user(buf, &kevent->event, event_size)) 210 /* send the main event */
211 if (copy_to_user(buf, &inotify_event, event_size))
469 return -EFAULT; 212 return -EFAULT;
470 213
471 if (kevent->name) { 214 buf += event_size;
472 buf += event_size;
473 215
474 if (copy_to_user(buf, kevent->name, kevent->event.len)) 216 /*
217 * fsnotify only stores the pathname, so here we have to send the pathname
218 * and then pad that pathname out to a multiple of sizeof(inotify_event)
219 * with zeros. I get my zeros from the nul_inotify_event.
220 */
221 if (name_len) {
222 unsigned int len_to_zero = name_len - event->name_len;
223 /* copy the path name */
224 if (copy_to_user(buf, event->file_name, event->name_len))
475 return -EFAULT; 225 return -EFAULT;
226 buf += event->name_len;
476 227
477 event_size += kevent->event.len; 228 /* fill userspace with 0's from nul_inotify_event */
229 if (copy_to_user(buf, &nul_inotify_event, len_to_zero))
230 return -EFAULT;
231 buf += len_to_zero;
232 event_size += name_len;
478 } 233 }
234
479 return event_size; 235 return event_size;
480} 236}
481 237
482static ssize_t inotify_read(struct file *file, char __user *buf, 238static ssize_t inotify_read(struct file *file, char __user *buf,
483 size_t count, loff_t *pos) 239 size_t count, loff_t *pos)
484{ 240{
485 struct inotify_device *dev; 241 struct fsnotify_group *group;
242 struct fsnotify_event *kevent;
486 char __user *start; 243 char __user *start;
487 int ret; 244 int ret;
488 DEFINE_WAIT(wait); 245 DEFINE_WAIT(wait);
489 246
490 start = buf; 247 start = buf;
491 dev = file->private_data; 248 group = file->private_data;
492 249
493 while (1) { 250 while (1) {
494 struct inotify_kernel_event *kevent; 251 prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
495 252
496 prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); 253 mutex_lock(&group->notification_mutex);
497 254 kevent = get_one_event(group, count);
498 mutex_lock(&dev->ev_mutex); 255 mutex_unlock(&group->notification_mutex);
499 kevent = get_one_event(dev, count);
500 mutex_unlock(&dev->ev_mutex);
501 256
502 if (kevent) { 257 if (kevent) {
503 ret = PTR_ERR(kevent); 258 ret = PTR_ERR(kevent);
504 if (IS_ERR(kevent)) 259 if (IS_ERR(kevent))
505 break; 260 break;
506 ret = copy_event_to_user(kevent, buf); 261 ret = copy_event_to_user(group, kevent, buf);
507 free_kevent(kevent); 262 fsnotify_put_event(kevent);
508 if (ret < 0) 263 if (ret < 0)
509 break; 264 break;
510 buf += ret; 265 buf += ret;
@@ -525,7 +280,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
525 schedule(); 280 schedule();
526 } 281 }
527 282
528 finish_wait(&dev->wq, &wait); 283 finish_wait(&group->notification_waitq, &wait);
529 if (start != buf && ret != -EFAULT) 284 if (start != buf && ret != -EFAULT)
530 ret = buf - start; 285 ret = buf - start;
531 return ret; 286 return ret;
@@ -533,25 +288,19 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
533 288
534static int inotify_fasync(int fd, struct file *file, int on) 289static int inotify_fasync(int fd, struct file *file, int on)
535{ 290{
536 struct inotify_device *dev = file->private_data; 291 struct fsnotify_group *group = file->private_data;
537 292
538 return fasync_helper(fd, file, on, &dev->fa) >= 0 ? 0 : -EIO; 293 return fasync_helper(fd, file, on, &group->inotify_data.fa) >= 0 ? 0 : -EIO;
539} 294}
540 295
541static int inotify_release(struct inode *ignored, struct file *file) 296static int inotify_release(struct inode *ignored, struct file *file)
542{ 297{
543 struct inotify_device *dev = file->private_data; 298 struct fsnotify_group *group = file->private_data;
544
545 inotify_destroy(dev->ih);
546 299
547 /* destroy all of the events on this device */ 300 fsnotify_clear_marks_by_group(group);
548 mutex_lock(&dev->ev_mutex);
549 while (!list_empty(&dev->events))
550 inotify_dev_event_dequeue(dev);
551 mutex_unlock(&dev->ev_mutex);
552 301
553 /* free this device: the put matching the get in inotify_init() */ 302 /* free this group, matching get was inotify_init->fsnotify_obtain_group */
554 put_inotify_dev(dev); 303 fsnotify_put_group(group);
555 304
556 return 0; 305 return 0;
557} 306}
@@ -559,16 +308,27 @@ static int inotify_release(struct inode *ignored, struct file *file)
559static long inotify_ioctl(struct file *file, unsigned int cmd, 308static long inotify_ioctl(struct file *file, unsigned int cmd,
560 unsigned long arg) 309 unsigned long arg)
561{ 310{
562 struct inotify_device *dev; 311 struct fsnotify_group *group;
312 struct fsnotify_event_holder *holder;
313 struct fsnotify_event *event;
563 void __user *p; 314 void __user *p;
564 int ret = -ENOTTY; 315 int ret = -ENOTTY;
316 size_t send_len = 0;
565 317
566 dev = file->private_data; 318 group = file->private_data;
567 p = (void __user *) arg; 319 p = (void __user *) arg;
568 320
569 switch (cmd) { 321 switch (cmd) {
570 case FIONREAD: 322 case FIONREAD:
571 ret = put_user(dev->queue_size, (int __user *) p); 323 mutex_lock(&group->notification_mutex);
324 list_for_each_entry(holder, &group->notification_list, event_list) {
325 event = holder->event;
326 send_len += sizeof(struct inotify_event);
327 send_len += roundup(event->name_len,
328 sizeof(struct inotify_event));
329 }
330 mutex_unlock(&group->notification_mutex);
331 ret = put_user(send_len, (int __user *) p);
572 break; 332 break;
573 } 333 }
574 334
@@ -576,23 +336,233 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
576} 336}
577 337
578static const struct file_operations inotify_fops = { 338static const struct file_operations inotify_fops = {
579 .poll = inotify_poll, 339 .poll = inotify_poll,
580 .read = inotify_read, 340 .read = inotify_read,
581 .fasync = inotify_fasync, 341 .fasync = inotify_fasync,
582 .release = inotify_release, 342 .release = inotify_release,
583 .unlocked_ioctl = inotify_ioctl, 343 .unlocked_ioctl = inotify_ioctl,
584 .compat_ioctl = inotify_ioctl, 344 .compat_ioctl = inotify_ioctl,
585}; 345};
586 346
587static const struct inotify_operations inotify_user_ops = {
588 .handle_event = inotify_dev_queue_event,
589 .destroy_watch = free_inotify_user_watch,
590};
591 347
348/*
349 * find_inode - resolve a user-given path to a specific inode
350 */
351static int inotify_find_inode(const char __user *dirname, struct path *path, unsigned flags)
352{
353 int error;
354
355 error = user_path_at(AT_FDCWD, dirname, flags, path);
356 if (error)
357 return error;
358 /* you can only watch an inode if you have read permissions on it */
359 error = inode_permission(path->dentry->d_inode, MAY_READ);
360 if (error)
361 path_put(path);
362 return error;
363}
364
365/*
366 * When, for whatever reason, inotify is done with a mark (or what used to be a
367 * watch) we need to remove that watch from the idr and we need to send IN_IGNORED
368 * for the given wd.
369 *
370 * There is a bit of recursion here. The loop looks like:
371 * inotify_destroy_mark_entry -> fsnotify_destroy_mark_by_entry ->
372 * inotify_freeing_mark -> inotify_destory_mark_entry -> restart
373 * But the loop is broken in 2 places. fsnotify_destroy_mark_by_entry sets
374 * entry->group = NULL before the call to inotify_freeing_mark, so the if (egroup)
375 * test below will not call back to fsnotify again. But even if that test wasn't
376 * there this would still be safe since fsnotify_destroy_mark_by_entry() is
377 * safe from recursion.
378 */
379void inotify_destroy_mark_entry(struct fsnotify_mark_entry *entry, struct fsnotify_group *group)
380{
381 struct inotify_inode_mark_entry *ientry;
382 struct inotify_event_private_data *event_priv;
383 struct fsnotify_event_private_data *fsn_event_priv;
384 struct fsnotify_group *egroup;
385 struct idr *idr;
386
387 spin_lock(&entry->lock);
388 egroup = entry->group;
389
390 /* if egroup we aren't really done and something might still send events
391 * for this inode, on the callback we'll send the IN_IGNORED */
392 if (egroup) {
393 spin_unlock(&entry->lock);
394 fsnotify_destroy_mark_by_entry(entry);
395 return;
396 }
397 spin_unlock(&entry->lock);
398
399 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
400
401 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
402 if (unlikely(!event_priv))
403 goto skip_send_ignore;
404
405 fsn_event_priv = &event_priv->fsnotify_event_priv_data;
406
407 fsn_event_priv->group = group;
408 event_priv->wd = ientry->wd;
409
410 fsnotify_add_notify_event(group, inotify_ignored_event, fsn_event_priv);
411
412 /* did the private data get added? */
413 if (list_empty(&fsn_event_priv->event_list))
414 inotify_free_event_priv(fsn_event_priv);
415
416skip_send_ignore:
417
418 /* remove this entry from the idr */
419 spin_lock(&group->inotify_data.idr_lock);
420 idr = &group->inotify_data.idr;
421 idr_remove(idr, ientry->wd);
422 spin_unlock(&group->inotify_data.idr_lock);
423
424 /* removed from idr, drop that reference */
425 fsnotify_put_mark(entry);
426}
427
428/* ding dong the mark is dead */
429static void inotify_free_mark(struct fsnotify_mark_entry *entry)
430{
431 struct inotify_inode_mark_entry *ientry = (struct inotify_inode_mark_entry *)entry;
432
433 kmem_cache_free(inotify_inode_mark_cachep, ientry);
434}
435
436static int inotify_update_watch(struct fsnotify_group *group, struct inode *inode, u32 arg)
437{
438 struct fsnotify_mark_entry *entry = NULL;
439 struct inotify_inode_mark_entry *ientry;
440 int ret = 0;
441 int add = (arg & IN_MASK_ADD);
442 __u32 mask;
443 __u32 old_mask, new_mask;
444
445 /* don't allow invalid bits: we don't want flags set */
446 mask = inotify_arg_to_mask(arg);
447 if (unlikely(!mask))
448 return -EINVAL;
449
450 ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
451 if (unlikely(!ientry))
452 return -ENOMEM;
453 /* we set the mask at the end after attaching it */
454 fsnotify_init_mark(&ientry->fsn_entry, inotify_free_mark);
455 ientry->wd = 0;
456
457find_entry:
458 spin_lock(&inode->i_lock);
459 entry = fsnotify_find_mark_entry(group, inode);
460 spin_unlock(&inode->i_lock);
461 if (entry) {
462 kmem_cache_free(inotify_inode_mark_cachep, ientry);
463 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
464 } else {
465 if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) {
466 ret = -ENOSPC;
467 goto out_err;
468 }
469
470 ret = fsnotify_add_mark(&ientry->fsn_entry, group, inode);
471 if (ret == -EEXIST)
472 goto find_entry;
473 else if (ret)
474 goto out_err;
475
476 entry = &ientry->fsn_entry;
477retry:
478 ret = -ENOMEM;
479 if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
480 goto out_err;
481
482 spin_lock(&group->inotify_data.idr_lock);
483 /* if entry is added to the idr we keep the reference obtained
484 * through fsnotify_mark_add. remember to drop this reference
485 * when entry is removed from idr */
486 ret = idr_get_new_above(&group->inotify_data.idr, entry,
487 ++group->inotify_data.last_wd,
488 &ientry->wd);
489 spin_unlock(&group->inotify_data.idr_lock);
490 if (ret) {
491 if (ret == -EAGAIN)
492 goto retry;
493 goto out_err;
494 }
495 atomic_inc(&group->inotify_data.user->inotify_watches);
496 }
497
498 spin_lock(&entry->lock);
499
500 old_mask = entry->mask;
501 if (add) {
502 entry->mask |= mask;
503 new_mask = entry->mask;
504 } else {
505 entry->mask = mask;
506 new_mask = entry->mask;
507 }
508
509 spin_unlock(&entry->lock);
510
511 if (old_mask != new_mask) {
512 /* more bits in old than in new? */
513 int dropped = (old_mask & ~new_mask);
514 /* more bits in this entry than the inode's mask? */
515 int do_inode = (new_mask & ~inode->i_fsnotify_mask);
516 /* more bits in this entry than the group? */
517 int do_group = (new_mask & ~group->mask);
518
519 /* update the inode with this new entry */
520 if (dropped || do_inode)
521 fsnotify_recalc_inode_mask(inode);
522
523 /* update the group mask with the new mask */
524 if (dropped || do_group)
525 fsnotify_recalc_group_mask(group);
526 }
527
528 return ientry->wd;
529
530out_err:
531 /* see this isn't supposed to happen, just kill the watch */
532 if (entry) {
533 fsnotify_destroy_mark_by_entry(entry);
534 fsnotify_put_mark(entry);
535 }
536 return ret;
537}
538
539static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events)
540{
541 struct fsnotify_group *group;
542 unsigned int grp_num;
543
544 /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
545 grp_num = (INOTIFY_GROUP_NUM - atomic_inc_return(&inotify_grp_num));
546 group = fsnotify_obtain_group(grp_num, 0, &inotify_fsnotify_ops);
547 if (IS_ERR(group))
548 return group;
549
550 group->max_events = max_events;
551
552 spin_lock_init(&group->inotify_data.idr_lock);
553 idr_init(&group->inotify_data.idr);
554 group->inotify_data.last_wd = 0;
555 group->inotify_data.user = user;
556 group->inotify_data.fa = NULL;
557
558 return group;
559}
560
561
562/* inotify syscalls */
592SYSCALL_DEFINE1(inotify_init1, int, flags) 563SYSCALL_DEFINE1(inotify_init1, int, flags)
593{ 564{
594 struct inotify_device *dev; 565 struct fsnotify_group *group;
595 struct inotify_handle *ih;
596 struct user_struct *user; 566 struct user_struct *user;
597 struct file *filp; 567 struct file *filp;
598 int fd, ret; 568 int fd, ret;
@@ -621,45 +591,27 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
621 goto out_free_uid; 591 goto out_free_uid;
622 } 592 }
623 593
624 dev = kmalloc(sizeof(struct inotify_device), GFP_KERNEL); 594 /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */
625 if (unlikely(!dev)) { 595 group = inotify_new_group(user, inotify_max_queued_events);
626 ret = -ENOMEM; 596 if (IS_ERR(group)) {
597 ret = PTR_ERR(group);
627 goto out_free_uid; 598 goto out_free_uid;
628 } 599 }
629 600
630 ih = inotify_init(&inotify_user_ops);
631 if (IS_ERR(ih)) {
632 ret = PTR_ERR(ih);
633 goto out_free_dev;
634 }
635 dev->ih = ih;
636 dev->fa = NULL;
637
638 filp->f_op = &inotify_fops; 601 filp->f_op = &inotify_fops;
639 filp->f_path.mnt = mntget(inotify_mnt); 602 filp->f_path.mnt = mntget(inotify_mnt);
640 filp->f_path.dentry = dget(inotify_mnt->mnt_root); 603 filp->f_path.dentry = dget(inotify_mnt->mnt_root);
641 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; 604 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
642 filp->f_mode = FMODE_READ; 605 filp->f_mode = FMODE_READ;
643 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK); 606 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
644 filp->private_data = dev; 607 filp->private_data = group;
645 608
646 INIT_LIST_HEAD(&dev->events);
647 init_waitqueue_head(&dev->wq);
648 mutex_init(&dev->ev_mutex);
649 mutex_init(&dev->up_mutex);
650 dev->event_count = 0;
651 dev->queue_size = 0;
652 dev->max_events = inotify_max_queued_events;
653 dev->user = user;
654 atomic_set(&dev->count, 0);
655
656 get_inotify_dev(dev);
657 atomic_inc(&user->inotify_devs); 609 atomic_inc(&user->inotify_devs);
610
658 fd_install(fd, filp); 611 fd_install(fd, filp);
659 612
660 return fd; 613 return fd;
661out_free_dev: 614
662 kfree(dev);
663out_free_uid: 615out_free_uid:
664 free_uid(user); 616 free_uid(user);
665 put_filp(filp); 617 put_filp(filp);
@@ -676,8 +628,8 @@ SYSCALL_DEFINE0(inotify_init)
676SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, 628SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
677 u32, mask) 629 u32, mask)
678{ 630{
631 struct fsnotify_group *group;
679 struct inode *inode; 632 struct inode *inode;
680 struct inotify_device *dev;
681 struct path path; 633 struct path path;
682 struct file *filp; 634 struct file *filp;
683 int ret, fput_needed; 635 int ret, fput_needed;
@@ -698,20 +650,20 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname,
698 if (mask & IN_ONLYDIR) 650 if (mask & IN_ONLYDIR)
699 flags |= LOOKUP_DIRECTORY; 651 flags |= LOOKUP_DIRECTORY;
700 652
701 ret = find_inode(pathname, &path, flags); 653 ret = inotify_find_inode(pathname, &path, flags);
702 if (unlikely(ret)) 654 if (ret)
703 goto fput_and_out; 655 goto fput_and_out;
704 656
705 /* inode held in place by reference to path; dev by fget on fd */ 657 /* inode held in place by reference to path; group by fget on fd */
706 inode = path.dentry->d_inode; 658 inode = path.dentry->d_inode;
707 dev = filp->private_data; 659 group = filp->private_data;
708 660
709 mutex_lock(&dev->up_mutex); 661 /* create/update an inode mark */
710 ret = inotify_find_update_watch(dev->ih, inode, mask); 662 ret = inotify_update_watch(group, inode, mask);
711 if (ret == -ENOENT) 663 if (unlikely(ret))
712 ret = create_watch(dev, inode, mask); 664 goto path_put_and_out;
713 mutex_unlock(&dev->up_mutex);
714 665
666path_put_and_out:
715 path_put(&path); 667 path_put(&path);
716fput_and_out: 668fput_and_out:
717 fput_light(filp, fput_needed); 669 fput_light(filp, fput_needed);
@@ -720,9 +672,10 @@ fput_and_out:
720 672
721SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) 673SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
722{ 674{
675 struct fsnotify_group *group;
676 struct fsnotify_mark_entry *entry;
723 struct file *filp; 677 struct file *filp;
724 struct inotify_device *dev; 678 int ret = 0, fput_needed;
725 int ret, fput_needed;
726 679
727 filp = fget_light(fd, &fput_needed); 680 filp = fget_light(fd, &fput_needed);
728 if (unlikely(!filp)) 681 if (unlikely(!filp))
@@ -734,10 +687,20 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
734 goto out; 687 goto out;
735 } 688 }
736 689
737 dev = filp->private_data; 690 group = filp->private_data;
738 691
739 /* we free our watch data when we get IN_IGNORED */ 692 spin_lock(&group->inotify_data.idr_lock);
740 ret = inotify_rm_wd(dev->ih, wd); 693 entry = idr_find(&group->inotify_data.idr, wd);
694 if (unlikely(!entry)) {
695 spin_unlock(&group->inotify_data.idr_lock);
696 ret = -EINVAL;
697 goto out;
698 }
699 fsnotify_get_mark(entry);
700 spin_unlock(&group->inotify_data.idr_lock);
701
702 inotify_destroy_mark_entry(entry, group);
703 fsnotify_put_mark(entry);
741 704
742out: 705out:
743 fput_light(filp, fput_needed); 706 fput_light(filp, fput_needed);
@@ -753,9 +716,9 @@ inotify_get_sb(struct file_system_type *fs_type, int flags,
753} 716}
754 717
755static struct file_system_type inotify_fs_type = { 718static struct file_system_type inotify_fs_type = {
756 .name = "inotifyfs", 719 .name = "inotifyfs",
757 .get_sb = inotify_get_sb, 720 .get_sb = inotify_get_sb,
758 .kill_sb = kill_anon_super, 721 .kill_sb = kill_anon_super,
759}; 722};
760 723
761/* 724/*
@@ -775,18 +738,16 @@ static int __init inotify_user_setup(void)
775 if (IS_ERR(inotify_mnt)) 738 if (IS_ERR(inotify_mnt))
776 panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt)); 739 panic("inotify: kern_mount ret %ld!\n", PTR_ERR(inotify_mnt));
777 740
741 inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
742 event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
743 inotify_ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE, NULL, 0);
744 if (!inotify_ignored_event)
745 panic("unable to allocate the inotify ignored event\n");
746
778 inotify_max_queued_events = 16384; 747 inotify_max_queued_events = 16384;
779 inotify_max_user_instances = 128; 748 inotify_max_user_instances = 128;
780 inotify_max_user_watches = 8192; 749 inotify_max_user_watches = 8192;
781 750
782 watch_cachep = kmem_cache_create("inotify_watch_cache",
783 sizeof(struct inotify_user_watch),
784 0, SLAB_PANIC, NULL);
785 event_cachep = kmem_cache_create("inotify_event_cache",
786 sizeof(struct inotify_kernel_event),
787 0, SLAB_PANIC, NULL);
788
789 return 0; 751 return 0;
790} 752}
791
792module_init(inotify_user_setup); 753module_init(inotify_user_setup);
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
new file mode 100644
index 000000000000..959b73e756fd
--- /dev/null
+++ b/fs/notify/notification.c
@@ -0,0 +1,411 @@
1/*
2 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
7 * any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; see the file COPYING. If not, write to
16 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18
19/*
20 * Basic idea behind the notification queue: An fsnotify group (like inotify)
21 * sends the userspace notification about events asyncronously some time after
22 * the event happened. When inotify gets an event it will need to add that
23 * event to the group notify queue. Since a single event might need to be on
24 * multiple group's notification queues we can't add the event directly to each
25 * queue and instead add a small "event_holder" to each queue. This event_holder
26 * has a pointer back to the original event. Since the majority of events are
27 * going to end up on one, and only one, notification queue we embed one
28 * event_holder into each event. This means we have a single allocation instead
29 * of always needing two. If the embedded event_holder is already in use by
30 * another group a new event_holder (from fsnotify_event_holder_cachep) will be
31 * allocated and used.
32 */
33
34#include <linux/fs.h>
35#include <linux/init.h>
36#include <linux/kernel.h>
37#include <linux/list.h>
38#include <linux/module.h>
39#include <linux/mount.h>
40#include <linux/mutex.h>
41#include <linux/namei.h>
42#include <linux/path.h>
43#include <linux/slab.h>
44#include <linux/spinlock.h>
45
46#include <asm/atomic.h>
47
48#include <linux/fsnotify_backend.h>
49#include "fsnotify.h"
50
51static struct kmem_cache *fsnotify_event_cachep;
52static struct kmem_cache *fsnotify_event_holder_cachep;
53/*
54 * This is a magic event we send when the q is too full. Since it doesn't
55 * hold real event information we just keep one system wide and use it any time
56 * it is needed. It's refcnt is set 1 at kernel init time and will never
57 * get set to 0 so it will never get 'freed'
58 */
59static struct fsnotify_event q_overflow_event;
60static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0);
61
62/**
63 * fsnotify_get_cookie - return a unique cookie for use in synchronizing events.
64 * Called from fsnotify_move, which is inlined into filesystem modules.
65 */
66u32 fsnotify_get_cookie(void)
67{
68 return atomic_inc_return(&fsnotify_sync_cookie);
69}
70EXPORT_SYMBOL_GPL(fsnotify_get_cookie);
71
72/* return true if the notify queue is empty, false otherwise */
73bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
74{
75 BUG_ON(!mutex_is_locked(&group->notification_mutex));
76 return list_empty(&group->notification_list) ? true : false;
77}
78
79void fsnotify_get_event(struct fsnotify_event *event)
80{
81 atomic_inc(&event->refcnt);
82}
83
84void fsnotify_put_event(struct fsnotify_event *event)
85{
86 if (!event)
87 return;
88
89 if (atomic_dec_and_test(&event->refcnt)) {
90 if (event->data_type == FSNOTIFY_EVENT_PATH)
91 path_put(&event->path);
92
93 BUG_ON(!list_empty(&event->private_data_list));
94
95 kfree(event->file_name);
96 kmem_cache_free(fsnotify_event_cachep, event);
97 }
98}
99
100struct fsnotify_event_holder *fsnotify_alloc_event_holder(void)
101{
102 return kmem_cache_alloc(fsnotify_event_holder_cachep, GFP_KERNEL);
103}
104
105void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
106{
107 kmem_cache_free(fsnotify_event_holder_cachep, holder);
108}
109
110/*
111 * Find the private data that the group previously attached to this event when
112 * the group added the event to the notification queue (fsnotify_add_notify_event)
113 */
114struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event)
115{
116 struct fsnotify_event_private_data *lpriv;
117 struct fsnotify_event_private_data *priv = NULL;
118
119 assert_spin_locked(&event->lock);
120
121 list_for_each_entry(lpriv, &event->private_data_list, event_list) {
122 if (lpriv->group == group) {
123 priv = lpriv;
124 list_del(&priv->event_list);
125 break;
126 }
127 }
128 return priv;
129}
130
131/*
132 * Check if 2 events contain the same information. We do not compare private data
133 * but at this moment that isn't a problem for any know fsnotify listeners.
134 */
135static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
136{
137 if ((old->mask == new->mask) &&
138 (old->to_tell == new->to_tell) &&
139 (old->data_type == new->data_type)) {
140 switch (old->data_type) {
141 case (FSNOTIFY_EVENT_INODE):
142 if (old->inode == new->inode)
143 return true;
144 break;
145 case (FSNOTIFY_EVENT_PATH):
146 if ((old->path.mnt == new->path.mnt) &&
147 (old->path.dentry == new->path.dentry))
148 return true;
149 case (FSNOTIFY_EVENT_NONE):
150 return true;
151 };
152 }
153 return false;
154}
155
156/*
157 * Add an event to the group notification queue. The group can later pull this
158 * event off the queue to deal with. If the event is successfully added to the
159 * group's notification queue, a reference is taken on event.
160 */
161int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
162 struct fsnotify_event_private_data *priv)
163{
164 struct fsnotify_event_holder *holder = NULL;
165 struct list_head *list = &group->notification_list;
166 struct fsnotify_event_holder *last_holder;
167 struct fsnotify_event *last_event;
168
169 /* easy to tell if priv was attached to the event */
170 INIT_LIST_HEAD(&priv->event_list);
171
172 /*
173 * There is one fsnotify_event_holder embedded inside each fsnotify_event.
174 * Check if we expect to be able to use that holder. If not alloc a new
175 * holder.
176 * For the overflow event it's possible that something will use the in
177 * event holder before we get the lock so we may need to jump back and
178 * alloc a new holder, this can't happen for most events...
179 */
180 if (!list_empty(&event->holder.event_list)) {
181alloc_holder:
182 holder = fsnotify_alloc_event_holder();
183 if (!holder)
184 return -ENOMEM;
185 }
186
187 mutex_lock(&group->notification_mutex);
188
189 if (group->q_len >= group->max_events) {
190 event = &q_overflow_event;
191 /* sorry, no private data on the overflow event */
192 priv = NULL;
193 }
194
195 spin_lock(&event->lock);
196
197 if (list_empty(&event->holder.event_list)) {
198 if (unlikely(holder))
199 fsnotify_destroy_event_holder(holder);
200 holder = &event->holder;
201 } else if (unlikely(!holder)) {
202 /* between the time we checked above and got the lock the in
203 * event holder was used, go back and get a new one */
204 spin_unlock(&event->lock);
205 mutex_unlock(&group->notification_mutex);
206 goto alloc_holder;
207 }
208
209 if (!list_empty(list)) {
210 last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
211 last_event = last_holder->event;
212 if (event_compare(last_event, event)) {
213 spin_unlock(&event->lock);
214 mutex_unlock(&group->notification_mutex);
215 if (holder != &event->holder)
216 fsnotify_destroy_event_holder(holder);
217 return -EEXIST;
218 }
219 }
220
221 group->q_len++;
222 holder->event = event;
223
224 fsnotify_get_event(event);
225 list_add_tail(&holder->event_list, list);
226 if (priv)
227 list_add_tail(&priv->event_list, &event->private_data_list);
228 spin_unlock(&event->lock);
229 mutex_unlock(&group->notification_mutex);
230
231 wake_up(&group->notification_waitq);
232 return 0;
233}
234
235/*
236 * Remove and return the first event from the notification list. There is a
237 * reference held on this event since it was on the list. It is the responsibility
238 * of the caller to drop this reference.
239 */
240struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
241{
242 struct fsnotify_event *event;
243 struct fsnotify_event_holder *holder;
244
245 BUG_ON(!mutex_is_locked(&group->notification_mutex));
246
247 holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
248
249 event = holder->event;
250
251 spin_lock(&event->lock);
252 holder->event = NULL;
253 list_del_init(&holder->event_list);
254 spin_unlock(&event->lock);
255
256 /* event == holder means we are referenced through the in event holder */
257 if (holder != &event->holder)
258 fsnotify_destroy_event_holder(holder);
259
260 group->q_len--;
261
262 return event;
263}
264
265/*
266 * This will not remove the event, that must be done with fsnotify_remove_notify_event()
267 */
268struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
269{
270 struct fsnotify_event *event;
271 struct fsnotify_event_holder *holder;
272
273 BUG_ON(!mutex_is_locked(&group->notification_mutex));
274
275 holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
276 event = holder->event;
277
278 return event;
279}
280
281/*
282 * Called when a group is being torn down to clean up any outstanding
283 * event notifications.
284 */
285void fsnotify_flush_notify(struct fsnotify_group *group)
286{
287 struct fsnotify_event *event;
288 struct fsnotify_event_private_data *priv;
289
290 mutex_lock(&group->notification_mutex);
291 while (!fsnotify_notify_queue_is_empty(group)) {
292 event = fsnotify_remove_notify_event(group);
293 /* if they don't implement free_event_priv they better not have attached any */
294 if (group->ops->free_event_priv) {
295 spin_lock(&event->lock);
296 priv = fsnotify_remove_priv_from_event(group, event);
297 spin_unlock(&event->lock);
298 if (priv)
299 group->ops->free_event_priv(priv);
300 }
301 fsnotify_put_event(event); /* matches fsnotify_add_notify_event */
302 }
303 mutex_unlock(&group->notification_mutex);
304}
305
306static void initialize_event(struct fsnotify_event *event)
307{
308 event->holder.event = NULL;
309 INIT_LIST_HEAD(&event->holder.event_list);
310 atomic_set(&event->refcnt, 1);
311
312 spin_lock_init(&event->lock);
313
314 event->path.dentry = NULL;
315 event->path.mnt = NULL;
316 event->inode = NULL;
317 event->data_type = FSNOTIFY_EVENT_NONE;
318
319 INIT_LIST_HEAD(&event->private_data_list);
320
321 event->to_tell = NULL;
322
323 event->file_name = NULL;
324 event->name_len = 0;
325
326 event->sync_cookie = 0;
327}
328
329/*
330 * fsnotify_create_event - Allocate a new event which will be sent to each
331 * group's handle_event function if the group was interested in this
332 * particular event.
333 *
334 * @to_tell the inode which is supposed to receive the event (sometimes a
335 * parent of the inode to which the event happened.
336 * @mask what actually happened.
337 * @data pointer to the object which was actually affected
338 * @data_type flag indication if the data is a file, path, inode, nothing...
339 * @name the filename, if available
340 */
341struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
342 int data_type, const char *name, u32 cookie)
343{
344 struct fsnotify_event *event;
345
346 event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
347 if (!event)
348 return NULL;
349
350 initialize_event(event);
351
352 if (name) {
353 event->file_name = kstrdup(name, GFP_KERNEL);
354 if (!event->file_name) {
355 kmem_cache_free(fsnotify_event_cachep, event);
356 return NULL;
357 }
358 event->name_len = strlen(event->file_name);
359 }
360
361 event->sync_cookie = cookie;
362 event->to_tell = to_tell;
363
364 switch (data_type) {
365 case FSNOTIFY_EVENT_FILE: {
366 struct file *file = data;
367 struct path *path = &file->f_path;
368 event->path.dentry = path->dentry;
369 event->path.mnt = path->mnt;
370 path_get(&event->path);
371 event->data_type = FSNOTIFY_EVENT_PATH;
372 break;
373 }
374 case FSNOTIFY_EVENT_PATH: {
375 struct path *path = data;
376 event->path.dentry = path->dentry;
377 event->path.mnt = path->mnt;
378 path_get(&event->path);
379 event->data_type = FSNOTIFY_EVENT_PATH;
380 break;
381 }
382 case FSNOTIFY_EVENT_INODE:
383 event->inode = data;
384 event->data_type = FSNOTIFY_EVENT_INODE;
385 break;
386 case FSNOTIFY_EVENT_NONE:
387 event->inode = NULL;
388 event->path.dentry = NULL;
389 event->path.mnt = NULL;
390 break;
391 default:
392 BUG();
393 }
394
395 event->mask = mask;
396
397 return event;
398}
399
400__init int fsnotify_notification_init(void)
401{
402 fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
403 fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
404
405 initialize_event(&q_overflow_event);
406 q_overflow_event.mask = FS_Q_OVERFLOW;
407
408 return 0;
409}
410subsys_initcall(fsnotify_notification_init);
411
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 15156364d196..97978004338d 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -180,10 +180,12 @@ d_iput: no no no yes
180#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ 180#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
181#define DCACHE_UNHASHED 0x0010 181#define DCACHE_UNHASHED 0x0010
182 182
183#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */ 183#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched by inotify */
184 184
185#define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */ 185#define DCACHE_COOKIE 0x0040 /* For use by dcookie subsystem */
186 186
187#define DCACHE_FSNOTIFY_PARENT_WATCHED 0x0080 /* Parent inode is watched by some fsnotify listener */
188
187extern spinlock_t dcache_lock; 189extern spinlock_t dcache_lock;
188extern seqlock_t rename_lock; 190extern seqlock_t rename_lock;
189 191
diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h
index 102a902b4396..ecc06286226d 100644
--- a/include/linux/dnotify.h
+++ b/include/linux/dnotify.h
@@ -10,7 +10,7 @@
10 10
11struct dnotify_struct { 11struct dnotify_struct {
12 struct dnotify_struct * dn_next; 12 struct dnotify_struct * dn_next;
13 unsigned long dn_mask; 13 __u32 dn_mask;
14 int dn_fd; 14 int dn_fd;
15 struct file * dn_filp; 15 struct file * dn_filp;
16 fl_owner_t dn_owner; 16 fl_owner_t dn_owner;
@@ -21,23 +21,18 @@ struct dnotify_struct {
21 21
22#ifdef CONFIG_DNOTIFY 22#ifdef CONFIG_DNOTIFY
23 23
24extern void __inode_dir_notify(struct inode *, unsigned long); 24#define DNOTIFY_ALL_EVENTS (FS_DELETE | FS_DELETE_CHILD |\
25 FS_MODIFY | FS_MODIFY_CHILD |\
26 FS_ACCESS | FS_ACCESS_CHILD |\
27 FS_ATTRIB | FS_ATTRIB_CHILD |\
28 FS_CREATE | FS_DN_RENAME |\
29 FS_MOVED_FROM | FS_MOVED_TO)
30
25extern void dnotify_flush(struct file *, fl_owner_t); 31extern void dnotify_flush(struct file *, fl_owner_t);
26extern int fcntl_dirnotify(int, struct file *, unsigned long); 32extern int fcntl_dirnotify(int, struct file *, unsigned long);
27extern void dnotify_parent(struct dentry *, unsigned long);
28
29static inline void inode_dir_notify(struct inode *inode, unsigned long event)
30{
31 if (inode->i_dnotify_mask & (event))
32 __inode_dir_notify(inode, event);
33}
34 33
35#else 34#else
36 35
37static inline void __inode_dir_notify(struct inode *inode, unsigned long event)
38{
39}
40
41static inline void dnotify_flush(struct file *filp, fl_owner_t id) 36static inline void dnotify_flush(struct file *filp, fl_owner_t id)
42{ 37{
43} 38}
@@ -47,14 +42,6 @@ static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
47 return -EINVAL; 42 return -EINVAL;
48} 43}
49 44
50static inline void dnotify_parent(struct dentry *dentry, unsigned long event)
51{
52}
53
54static inline void inode_dir_notify(struct inode *inode, unsigned long event)
55{
56}
57
58#endif /* CONFIG_DNOTIFY */ 45#endif /* CONFIG_DNOTIFY */
59 46
60#endif /* __KERNEL __ */ 47#endif /* __KERNEL __ */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 83d6b4397245..323b5ce474c1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -755,9 +755,9 @@ struct inode {
755 755
756 __u32 i_generation; 756 __u32 i_generation;
757 757
758#ifdef CONFIG_DNOTIFY 758#ifdef CONFIG_FSNOTIFY
759 unsigned long i_dnotify_mask; /* Directory notify events */ 759 __u32 i_fsnotify_mask; /* all events this inode cares about */
760 struct dnotify_struct *i_dnotify; /* for directory notifications */ 760 struct hlist_head i_fsnotify_mark_entries; /* fsnotify mark entries */
761#endif 761#endif
762 762
763#ifdef CONFIG_INOTIFY 763#ifdef CONFIG_INOTIFY
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 00fbd5b245c9..936f9aa8bb97 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -13,6 +13,7 @@
13 13
14#include <linux/dnotify.h> 14#include <linux/dnotify.h>
15#include <linux/inotify.h> 15#include <linux/inotify.h>
16#include <linux/fsnotify_backend.h>
16#include <linux/audit.h> 17#include <linux/audit.h>
17 18
18/* 19/*
@@ -22,19 +23,45 @@
22static inline void fsnotify_d_instantiate(struct dentry *entry, 23static inline void fsnotify_d_instantiate(struct dentry *entry,
23 struct inode *inode) 24 struct inode *inode)
24{ 25{
26 __fsnotify_d_instantiate(entry, inode);
27
25 inotify_d_instantiate(entry, inode); 28 inotify_d_instantiate(entry, inode);
26} 29}
27 30
31/* Notify this dentry's parent about a child's events. */
32static inline void fsnotify_parent(struct dentry *dentry, __u32 mask)
33{
34 __fsnotify_parent(dentry, mask);
35
36 inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
37}
38
28/* 39/*
29 * fsnotify_d_move - entry has been moved 40 * fsnotify_d_move - entry has been moved
30 * Called with dcache_lock and entry->d_lock held. 41 * Called with dcache_lock and entry->d_lock held.
31 */ 42 */
32static inline void fsnotify_d_move(struct dentry *entry) 43static inline void fsnotify_d_move(struct dentry *entry)
33{ 44{
45 /*
46 * On move we need to update entry->d_flags to indicate if the new parent
47 * cares about events from this entry.
48 */
49 __fsnotify_update_dcache_flags(entry);
50
34 inotify_d_move(entry); 51 inotify_d_move(entry);
35} 52}
36 53
37/* 54/*
55 * fsnotify_link_count - inode's link count changed
56 */
57static inline void fsnotify_link_count(struct inode *inode)
58{
59 inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL);
60
61 fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
62}
63
64/*
38 * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir 65 * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir
39 */ 66 */
40static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, 67static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
@@ -42,42 +69,62 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
42 int isdir, struct inode *target, struct dentry *moved) 69 int isdir, struct inode *target, struct dentry *moved)
43{ 70{
44 struct inode *source = moved->d_inode; 71 struct inode *source = moved->d_inode;
45 u32 cookie = inotify_get_cookie(); 72 u32 in_cookie = inotify_get_cookie();
73 u32 fs_cookie = fsnotify_get_cookie();
74 __u32 old_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_FROM);
75 __u32 new_dir_mask = (FS_EVENT_ON_CHILD | FS_MOVED_TO);
46 76
47 if (old_dir == new_dir) 77 if (old_dir == new_dir)
48 inode_dir_notify(old_dir, DN_RENAME); 78 old_dir_mask |= FS_DN_RENAME;
49 else {
50 inode_dir_notify(old_dir, DN_DELETE);
51 inode_dir_notify(new_dir, DN_CREATE);
52 }
53 79
54 if (isdir) 80 if (isdir) {
55 isdir = IN_ISDIR; 81 isdir = IN_ISDIR;
56 inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name, 82 old_dir_mask |= FS_IN_ISDIR;
83 new_dir_mask |= FS_IN_ISDIR;
84 }
85
86 inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir, in_cookie, old_name,
57 source); 87 source);
58 inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name, 88 inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, in_cookie, new_name,
59 source); 89 source);
60 90
91 fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE, old_name, fs_cookie);
92 fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE, new_name, fs_cookie);
93
61 if (target) { 94 if (target) {
62 inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL); 95 inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL);
63 inotify_inode_is_dead(target); 96 inotify_inode_is_dead(target);
97
98 /* this is really a link_count change not a removal */
99 fsnotify_link_count(target);
64 } 100 }
65 101
66 if (source) { 102 if (source) {
67 inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL); 103 inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL);
104 fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE, NULL, 0);
68 } 105 }
69 audit_inode_child(new_name, moved, new_dir); 106 audit_inode_child(new_name, moved, new_dir);
70} 107}
71 108
72/* 109/*
110 * fsnotify_inode_delete - and inode is being evicted from cache, clean up is needed
111 */
112static inline void fsnotify_inode_delete(struct inode *inode)
113{
114 __fsnotify_inode_delete(inode);
115}
116
117/*
73 * fsnotify_nameremove - a filename was removed from a directory 118 * fsnotify_nameremove - a filename was removed from a directory
74 */ 119 */
75static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) 120static inline void fsnotify_nameremove(struct dentry *dentry, int isdir)
76{ 121{
122 __u32 mask = FS_DELETE;
123
77 if (isdir) 124 if (isdir)
78 isdir = IN_ISDIR; 125 mask |= FS_IN_ISDIR;
79 dnotify_parent(dentry, DN_DELETE); 126
80 inotify_dentry_parent_queue_event(dentry, IN_DELETE|isdir, 0, dentry->d_name.name); 127 fsnotify_parent(dentry, mask);
81} 128}
82 129
83/* 130/*
@@ -87,14 +134,9 @@ static inline void fsnotify_inoderemove(struct inode *inode)
87{ 134{
88 inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL); 135 inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL);
89 inotify_inode_is_dead(inode); 136 inotify_inode_is_dead(inode);
90}
91 137
92/* 138 fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
93 * fsnotify_link_count - inode's link count changed 139 __fsnotify_inode_delete(inode);
94 */
95static inline void fsnotify_link_count(struct inode *inode)
96{
97 inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL);
98} 140}
99 141
100/* 142/*
@@ -102,10 +144,11 @@ static inline void fsnotify_link_count(struct inode *inode)
102 */ 144 */
103static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) 145static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
104{ 146{
105 inode_dir_notify(inode, DN_CREATE);
106 inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name, 147 inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name,
107 dentry->d_inode); 148 dentry->d_inode);
108 audit_inode_child(dentry->d_name.name, dentry, inode); 149 audit_inode_child(dentry->d_name.name, dentry, inode);
150
151 fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
109} 152}
110 153
111/* 154/*
@@ -115,11 +158,12 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry)
115 */ 158 */
116static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry) 159static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry)
117{ 160{
118 inode_dir_notify(dir, DN_CREATE);
119 inotify_inode_queue_event(dir, IN_CREATE, 0, new_dentry->d_name.name, 161 inotify_inode_queue_event(dir, IN_CREATE, 0, new_dentry->d_name.name,
120 inode); 162 inode);
121 fsnotify_link_count(inode); 163 fsnotify_link_count(inode);
122 audit_inode_child(new_dentry->d_name.name, new_dentry, dir); 164 audit_inode_child(new_dentry->d_name.name, new_dentry, dir);
165
166 fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE, new_dentry->d_name.name, 0);
123} 167}
124 168
125/* 169/*
@@ -127,10 +171,13 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct
127 */ 171 */
128static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) 172static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
129{ 173{
130 inode_dir_notify(inode, DN_CREATE); 174 __u32 mask = (FS_CREATE | FS_IN_ISDIR);
131 inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, 175 struct inode *d_inode = dentry->d_inode;
132 dentry->d_name.name, dentry->d_inode); 176
177 inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode);
133 audit_inode_child(dentry->d_name.name, dentry, inode); 178 audit_inode_child(dentry->d_name.name, dentry, inode);
179
180 fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE, dentry->d_name.name, 0);
134} 181}
135 182
136/* 183/*
@@ -139,14 +186,15 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry)
139static inline void fsnotify_access(struct dentry *dentry) 186static inline void fsnotify_access(struct dentry *dentry)
140{ 187{
141 struct inode *inode = dentry->d_inode; 188 struct inode *inode = dentry->d_inode;
142 u32 mask = IN_ACCESS; 189 __u32 mask = FS_ACCESS;
143 190
144 if (S_ISDIR(inode->i_mode)) 191 if (S_ISDIR(inode->i_mode))
145 mask |= IN_ISDIR; 192 mask |= FS_IN_ISDIR;
146 193
147 dnotify_parent(dentry, DN_ACCESS);
148 inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
149 inotify_inode_queue_event(inode, mask, 0, NULL, NULL); 194 inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
195
196 fsnotify_parent(dentry, mask);
197 fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
150} 198}
151 199
152/* 200/*
@@ -155,14 +203,15 @@ static inline void fsnotify_access(struct dentry *dentry)
155static inline void fsnotify_modify(struct dentry *dentry) 203static inline void fsnotify_modify(struct dentry *dentry)
156{ 204{
157 struct inode *inode = dentry->d_inode; 205 struct inode *inode = dentry->d_inode;
158 u32 mask = IN_MODIFY; 206 __u32 mask = FS_MODIFY;
159 207
160 if (S_ISDIR(inode->i_mode)) 208 if (S_ISDIR(inode->i_mode))
161 mask |= IN_ISDIR; 209 mask |= FS_IN_ISDIR;
162 210
163 dnotify_parent(dentry, DN_MODIFY);
164 inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
165 inotify_inode_queue_event(inode, mask, 0, NULL, NULL); 211 inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
212
213 fsnotify_parent(dentry, mask);
214 fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
166} 215}
167 216
168/* 217/*
@@ -171,13 +220,15 @@ static inline void fsnotify_modify(struct dentry *dentry)
171static inline void fsnotify_open(struct dentry *dentry) 220static inline void fsnotify_open(struct dentry *dentry)
172{ 221{
173 struct inode *inode = dentry->d_inode; 222 struct inode *inode = dentry->d_inode;
174 u32 mask = IN_OPEN; 223 __u32 mask = FS_OPEN;
175 224
176 if (S_ISDIR(inode->i_mode)) 225 if (S_ISDIR(inode->i_mode))
177 mask |= IN_ISDIR; 226 mask |= FS_IN_ISDIR;
178 227
179 inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
180 inotify_inode_queue_event(inode, mask, 0, NULL, NULL); 228 inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
229
230 fsnotify_parent(dentry, mask);
231 fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
181} 232}
182 233
183/* 234/*
@@ -187,15 +238,16 @@ static inline void fsnotify_close(struct file *file)
187{ 238{
188 struct dentry *dentry = file->f_path.dentry; 239 struct dentry *dentry = file->f_path.dentry;
189 struct inode *inode = dentry->d_inode; 240 struct inode *inode = dentry->d_inode;
190 const char *name = dentry->d_name.name;
191 fmode_t mode = file->f_mode; 241 fmode_t mode = file->f_mode;
192 u32 mask = (mode & FMODE_WRITE) ? IN_CLOSE_WRITE : IN_CLOSE_NOWRITE; 242 __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE;
193 243
194 if (S_ISDIR(inode->i_mode)) 244 if (S_ISDIR(inode->i_mode))
195 mask |= IN_ISDIR; 245 mask |= FS_IN_ISDIR;
196 246
197 inotify_dentry_parent_queue_event(dentry, mask, 0, name);
198 inotify_inode_queue_event(inode, mask, 0, NULL, NULL); 247 inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
248
249 fsnotify_parent(dentry, mask);
250 fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE, NULL, 0);
199} 251}
200 252
201/* 253/*
@@ -204,13 +256,15 @@ static inline void fsnotify_close(struct file *file)
204static inline void fsnotify_xattr(struct dentry *dentry) 256static inline void fsnotify_xattr(struct dentry *dentry)
205{ 257{
206 struct inode *inode = dentry->d_inode; 258 struct inode *inode = dentry->d_inode;
207 u32 mask = IN_ATTRIB; 259 __u32 mask = FS_ATTRIB;
208 260
209 if (S_ISDIR(inode->i_mode)) 261 if (S_ISDIR(inode->i_mode))
210 mask |= IN_ISDIR; 262 mask |= FS_IN_ISDIR;
211 263
212 inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name);
213 inotify_inode_queue_event(inode, mask, 0, NULL, NULL); 264 inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
265
266 fsnotify_parent(dentry, mask);
267 fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
214} 268}
215 269
216/* 270/*
@@ -220,50 +274,37 @@ static inline void fsnotify_xattr(struct dentry *dentry)
220static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) 274static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid)
221{ 275{
222 struct inode *inode = dentry->d_inode; 276 struct inode *inode = dentry->d_inode;
223 int dn_mask = 0; 277 __u32 mask = 0;
224 u32 in_mask = 0; 278
279 if (ia_valid & ATTR_UID)
280 mask |= FS_ATTRIB;
281 if (ia_valid & ATTR_GID)
282 mask |= FS_ATTRIB;
283 if (ia_valid & ATTR_SIZE)
284 mask |= FS_MODIFY;
225 285
226 if (ia_valid & ATTR_UID) {
227 in_mask |= IN_ATTRIB;
228 dn_mask |= DN_ATTRIB;
229 }
230 if (ia_valid & ATTR_GID) {
231 in_mask |= IN_ATTRIB;
232 dn_mask |= DN_ATTRIB;
233 }
234 if (ia_valid & ATTR_SIZE) {
235 in_mask |= IN_MODIFY;
236 dn_mask |= DN_MODIFY;
237 }
238 /* both times implies a utime(s) call */ 286 /* both times implies a utime(s) call */
239 if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) 287 if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME))
240 { 288 mask |= FS_ATTRIB;
241 in_mask |= IN_ATTRIB; 289 else if (ia_valid & ATTR_ATIME)
242 dn_mask |= DN_ATTRIB; 290 mask |= FS_ACCESS;
243 } else if (ia_valid & ATTR_ATIME) { 291 else if (ia_valid & ATTR_MTIME)
244 in_mask |= IN_ACCESS; 292 mask |= FS_MODIFY;
245 dn_mask |= DN_ACCESS; 293
246 } else if (ia_valid & ATTR_MTIME) { 294 if (ia_valid & ATTR_MODE)
247 in_mask |= IN_MODIFY; 295 mask |= FS_ATTRIB;
248 dn_mask |= DN_MODIFY;
249 }
250 if (ia_valid & ATTR_MODE) {
251 in_mask |= IN_ATTRIB;
252 dn_mask |= DN_ATTRIB;
253 }
254 296
255 if (dn_mask) 297 if (mask) {
256 dnotify_parent(dentry, dn_mask);
257 if (in_mask) {
258 if (S_ISDIR(inode->i_mode)) 298 if (S_ISDIR(inode->i_mode))
259 in_mask |= IN_ISDIR; 299 mask |= FS_IN_ISDIR;
260 inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL); 300 inotify_inode_queue_event(inode, mask, 0, NULL, NULL);
261 inotify_dentry_parent_queue_event(dentry, in_mask, 0, 301
262 dentry->d_name.name); 302 fsnotify_parent(dentry, mask);
303 fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE, NULL, 0);
263 } 304 }
264} 305}
265 306
266#ifdef CONFIG_INOTIFY /* inotify helpers */ 307#if defined(CONFIG_INOTIFY) || defined(CONFIG_FSNOTIFY) /* notify helpers */
267 308
268/* 309/*
269 * fsnotify_oldname_init - save off the old filename before we change it 310 * fsnotify_oldname_init - save off the old filename before we change it
@@ -281,7 +322,7 @@ static inline void fsnotify_oldname_free(const char *old_name)
281 kfree(old_name); 322 kfree(old_name);
282} 323}
283 324
284#else /* CONFIG_INOTIFY */ 325#else /* CONFIG_INOTIFY || CONFIG_FSNOTIFY */
285 326
286static inline const char *fsnotify_oldname_init(const char *name) 327static inline const char *fsnotify_oldname_init(const char *name)
287{ 328{
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
new file mode 100644
index 000000000000..44848aa830dc
--- /dev/null
+++ b/include/linux/fsnotify_backend.h
@@ -0,0 +1,387 @@
1/*
2 * Filesystem access notification for Linux
3 *
4 * Copyright (C) 2008 Red Hat, Inc., Eric Paris <eparis@redhat.com>
5 */
6
7#ifndef __LINUX_FSNOTIFY_BACKEND_H
8#define __LINUX_FSNOTIFY_BACKEND_H
9
10#ifdef __KERNEL__
11
12#include <linux/idr.h> /* inotify uses this */
13#include <linux/fs.h> /* struct inode */
14#include <linux/list.h>
15#include <linux/path.h> /* struct path */
16#include <linux/spinlock.h>
17#include <linux/types.h>
18
19#include <asm/atomic.h>
20
21/*
22 * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily
23 * convert between them. dnotify only needs conversion at watch creation
24 * so no perf loss there. fanotify isn't defined yet, so it can use the
25 * wholes if it needs more events.
26 */
27#define FS_ACCESS 0x00000001 /* File was accessed */
28#define FS_MODIFY 0x00000002 /* File was modified */
29#define FS_ATTRIB 0x00000004 /* Metadata changed */
30#define FS_CLOSE_WRITE 0x00000008 /* Writtable file was closed */
31#define FS_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */
32#define FS_OPEN 0x00000020 /* File was opened */
33#define FS_MOVED_FROM 0x00000040 /* File was moved from X */
34#define FS_MOVED_TO 0x00000080 /* File was moved to Y */
35#define FS_CREATE 0x00000100 /* Subfile was created */
36#define FS_DELETE 0x00000200 /* Subfile was deleted */
37#define FS_DELETE_SELF 0x00000400 /* Self was deleted */
38#define FS_MOVE_SELF 0x00000800 /* Self was moved */
39
40#define FS_UNMOUNT 0x00002000 /* inode on umount fs */
41#define FS_Q_OVERFLOW 0x00004000 /* Event queued overflowed */
42#define FS_IN_IGNORED 0x00008000 /* last inotify event here */
43
44#define FS_IN_ISDIR 0x40000000 /* event occurred against dir */
45#define FS_IN_ONESHOT 0x80000000 /* only send event once */
46
47#define FS_DN_RENAME 0x10000000 /* file renamed */
48#define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */
49
50/* This inode cares about things that happen to its children. Always set for
51 * dnotify and inotify. */
52#define FS_EVENT_ON_CHILD 0x08000000
53
54/* This is a list of all events that may get sent to a parernt based on fs event
55 * happening to inodes inside that directory */
56#define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\
57 FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\
58 FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\
59 FS_DELETE)
60
61/* listeners that hard code group numbers near the top */
62#define DNOTIFY_GROUP_NUM UINT_MAX
63#define INOTIFY_GROUP_NUM (DNOTIFY_GROUP_NUM-1)
64
65struct fsnotify_group;
66struct fsnotify_event;
67struct fsnotify_mark_entry;
68struct fsnotify_event_private_data;
69
70/*
71 * Each group much define these ops. The fsnotify infrastructure will call
72 * these operations for each relevant group.
73 *
74 * should_send_event - given a group, inode, and mask this function determines
75 * if the group is interested in this event.
76 * handle_event - main call for a group to handle an fs event
77 * free_group_priv - called when a group refcnt hits 0 to clean up the private union
78 * freeing-mark - this means that a mark has been flagged to die when everything
79 * finishes using it. The function is supplied with what must be a
80 * valid group and inode to use to clean up.
81 */
82struct fsnotify_ops {
83 bool (*should_send_event)(struct fsnotify_group *group, struct inode *inode, __u32 mask);
84 int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event);
85 void (*free_group_priv)(struct fsnotify_group *group);
86 void (*freeing_mark)(struct fsnotify_mark_entry *entry, struct fsnotify_group *group);
87 void (*free_event_priv)(struct fsnotify_event_private_data *priv);
88};
89
90/*
91 * A group is a "thing" that wants to receive notification about filesystem
92 * events. The mask holds the subset of event types this group cares about.
93 * refcnt on a group is up to the implementor and at any moment if it goes 0
94 * everything will be cleaned up.
95 */
96struct fsnotify_group {
97 /*
98 * global list of all groups receiving events from fsnotify.
99 * anchored by fsnotify_groups and protected by either fsnotify_grp_mutex
100 * or fsnotify_grp_srcu depending on write vs read.
101 */
102 struct list_head group_list;
103
104 /*
105 * Defines all of the event types in which this group is interested.
106 * This mask is a bitwise OR of the FS_* events from above. Each time
107 * this mask changes for a group (if it changes) the correct functions
108 * must be called to update the global structures which indicate global
109 * interest in event types.
110 */
111 __u32 mask;
112
113 /*
114 * How the refcnt is used is up to each group. When the refcnt hits 0
115 * fsnotify will clean up all of the resources associated with this group.
116 * As an example, the dnotify group will always have a refcnt=1 and that
117 * will never change. Inotify, on the other hand, has a group per
118 * inotify_init() and the refcnt will hit 0 only when that fd has been
119 * closed.
120 */
121 atomic_t refcnt; /* things with interest in this group */
122 unsigned int group_num; /* simply prevents accidental group collision */
123
124 const struct fsnotify_ops *ops; /* how this group handles things */
125
126 /* needed to send notification to userspace */
127 struct mutex notification_mutex; /* protect the notification_list */
128 struct list_head notification_list; /* list of event_holder this group needs to send to userspace */
129 wait_queue_head_t notification_waitq; /* read() on the notification file blocks on this waitq */
130 unsigned int q_len; /* events on the queue */
131 unsigned int max_events; /* maximum events allowed on the list */
132
133 /* stores all fastapth entries assoc with this group so they can be cleaned on unregister */
134 spinlock_t mark_lock; /* protect mark_entries list */
135 atomic_t num_marks; /* 1 for each mark entry and 1 for not being
136 * past the point of no return when freeing
137 * a group */
138 struct list_head mark_entries; /* all inode mark entries for this group */
139
140 /* prevents double list_del of group_list. protected by global fsnotify_grp_mutex */
141 bool on_group_list;
142
143 /* groups can define private fields here or use the void *private */
144 union {
145 void *private;
146#ifdef CONFIG_INOTIFY_USER
147 struct inotify_group_private_data {
148 spinlock_t idr_lock;
149 struct idr idr;
150 u32 last_wd;
151 struct fasync_struct *fa; /* async notification */
152 struct user_struct *user;
153 } inotify_data;
154#endif
155 };
156};
157
158/*
159 * A single event can be queued in multiple group->notification_lists.
160 *
161 * each group->notification_list will point to an event_holder which in turns points
162 * to the actual event that needs to be sent to userspace.
163 *
164 * Seemed cheaper to create a refcnt'd event and a small holder for every group
165 * than create a different event for every group
166 *
167 */
168struct fsnotify_event_holder {
169 struct fsnotify_event *event;
170 struct list_head event_list;
171};
172
173/*
174 * Inotify needs to tack data onto an event. This struct lets us later find the
175 * correct private data of the correct group.
176 */
177struct fsnotify_event_private_data {
178 struct fsnotify_group *group;
179 struct list_head event_list;
180};
181
182/*
183 * all of the information about the original object we want to now send to
184 * a group. If you want to carry more info from the accessing task to the
185 * listener this structure is where you need to be adding fields.
186 */
187struct fsnotify_event {
188 /*
189 * If we create an event we are also likely going to need a holder
190 * to link to a group. So embed one holder in the event. Means only
191 * one allocation for the common case where we only have one group
192 */
193 struct fsnotify_event_holder holder;
194 spinlock_t lock; /* protection for the associated event_holder and private_list */
195 /* to_tell may ONLY be dereferenced during handle_event(). */
196 struct inode *to_tell; /* either the inode the event happened to or its parent */
197 /*
198 * depending on the event type we should have either a path or inode
199 * We hold a reference on path, but NOT on inode. Since we have the ref on
200 * the path, it may be dereferenced at any point during this object's
201 * lifetime. That reference is dropped when this object's refcnt hits
202 * 0. If this event contains an inode instead of a path, the inode may
203 * ONLY be used during handle_event().
204 */
205 union {
206 struct path path;
207 struct inode *inode;
208 };
209/* when calling fsnotify tell it if the data is a path or inode */
210#define FSNOTIFY_EVENT_NONE 0
211#define FSNOTIFY_EVENT_PATH 1
212#define FSNOTIFY_EVENT_INODE 2
213#define FSNOTIFY_EVENT_FILE 3
214 int data_type; /* which of the above union we have */
215 atomic_t refcnt; /* how many groups still are using/need to send this event */
216 __u32 mask; /* the type of access, bitwise OR for FS_* event types */
217
218 u32 sync_cookie; /* used to corrolate events, namely inotify mv events */
219 char *file_name;
220 size_t name_len;
221
222 struct list_head private_data_list; /* groups can store private data here */
223};
224
225/*
226 * a mark is simply an entry attached to an in core inode which allows an
227 * fsnotify listener to indicate they are either no longer interested in events
228 * of a type matching mask or only interested in those events.
229 *
230 * these are flushed when an inode is evicted from core and may be flushed
231 * when the inode is modified (as seen by fsnotify_access). Some fsnotify users
232 * (such as dnotify) will flush these when the open fd is closed and not at
233 * inode eviction or modification.
234 */
235struct fsnotify_mark_entry {
236 __u32 mask; /* mask this mark entry is for */
237 /* we hold ref for each i_list and g_list. also one ref for each 'thing'
238 * in kernel that found and may be using this mark. */
239 atomic_t refcnt; /* active things looking at this mark */
240 struct inode *inode; /* inode this entry is associated with */
241 struct fsnotify_group *group; /* group this mark entry is for */
242 struct hlist_node i_list; /* list of mark_entries by inode->i_fsnotify_mark_entries */
243 struct list_head g_list; /* list of mark_entries by group->i_fsnotify_mark_entries */
244 spinlock_t lock; /* protect group, inode, and killme */
245 struct list_head free_i_list; /* tmp list used when freeing this mark */
246 struct list_head free_g_list; /* tmp list used when freeing this mark */
247 void (*free_mark)(struct fsnotify_mark_entry *entry); /* called on final put+free */
248};
249
250#ifdef CONFIG_FSNOTIFY
251
252/* called from the vfs helpers */
253
254/* main fsnotify call to send events */
255extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
256 const char *name, u32 cookie);
257extern void __fsnotify_parent(struct dentry *dentry, __u32 mask);
258extern void __fsnotify_inode_delete(struct inode *inode);
259extern u32 fsnotify_get_cookie(void);
260
261static inline int fsnotify_inode_watches_children(struct inode *inode)
262{
263 /* FS_EVENT_ON_CHILD is set if the inode may care */
264 if (!(inode->i_fsnotify_mask & FS_EVENT_ON_CHILD))
265 return 0;
266 /* this inode might care about child events, does it care about the
267 * specific set of events that can happen on a child? */
268 return inode->i_fsnotify_mask & FS_EVENTS_POSS_ON_CHILD;
269}
270
271/*
272 * Update the dentry with a flag indicating the interest of its parent to receive
273 * filesystem events when those events happens to this dentry->d_inode.
274 */
275static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
276{
277 struct dentry *parent;
278
279 assert_spin_locked(&dcache_lock);
280 assert_spin_locked(&dentry->d_lock);
281
282 parent = dentry->d_parent;
283 if (fsnotify_inode_watches_children(parent->d_inode))
284 dentry->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED;
285 else
286 dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED;
287}
288
289/*
290 * fsnotify_d_instantiate - instantiate a dentry for inode
291 * Called with dcache_lock held.
292 */
293static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
294{
295 if (!inode)
296 return;
297
298 assert_spin_locked(&dcache_lock);
299
300 spin_lock(&dentry->d_lock);
301 __fsnotify_update_dcache_flags(dentry);
302 spin_unlock(&dentry->d_lock);
303}
304
305/* called from fsnotify listeners, such as fanotify or dnotify */
306
307/* must call when a group changes its ->mask */
308extern void fsnotify_recalc_global_mask(void);
309/* get a reference to an existing or create a new group */
310extern struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num,
311 __u32 mask,
312 const struct fsnotify_ops *ops);
313/* run all marks associated with this group and update group->mask */
314extern void fsnotify_recalc_group_mask(struct fsnotify_group *group);
315/* drop reference on a group from fsnotify_obtain_group */
316extern void fsnotify_put_group(struct fsnotify_group *group);
317
318/* take a reference to an event */
319extern void fsnotify_get_event(struct fsnotify_event *event);
320extern void fsnotify_put_event(struct fsnotify_event *event);
321/* find private data previously attached to an event and unlink it */
322extern struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group,
323 struct fsnotify_event *event);
324
325/* attach the event to the group notification queue */
326extern int fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
327 struct fsnotify_event_private_data *priv);
328/* true if the group notification queue is empty */
329extern bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group);
330/* return, but do not dequeue the first event on the notification queue */
331extern struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group);
332/* return AND dequeue the first event on the notification queue */
333extern struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group);
334
335/* functions used to manipulate the marks attached to inodes */
336
337/* run all marks associated with an inode and update inode->i_fsnotify_mask */
338extern void fsnotify_recalc_inode_mask(struct inode *inode);
339extern void fsnotify_init_mark(struct fsnotify_mark_entry *entry, void (*free_mark)(struct fsnotify_mark_entry *entry));
340/* find (and take a reference) to a mark associated with group and inode */
341extern struct fsnotify_mark_entry *fsnotify_find_mark_entry(struct fsnotify_group *group, struct inode *inode);
342/* attach the mark to both the group and the inode */
343extern int fsnotify_add_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group, struct inode *inode);
344/* given a mark, flag it to be freed when all references are dropped */
345extern void fsnotify_destroy_mark_by_entry(struct fsnotify_mark_entry *entry);
346/* run all the marks in a group, and flag them to be freed */
347extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group);
348extern void fsnotify_get_mark(struct fsnotify_mark_entry *entry);
349extern void fsnotify_put_mark(struct fsnotify_mark_entry *entry);
350extern void fsnotify_unmount_inodes(struct list_head *list);
351
352/* put here because inotify does some weird stuff when destroying watches */
353extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
354 void *data, int data_is, const char *name,
355 u32 cookie);
356
357#else
358
359static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
360 const char *name, u32 cookie)
361{}
362
363static inline void __fsnotify_parent(struct dentry *dentry, __u32 mask)
364{}
365
366static inline void __fsnotify_inode_delete(struct inode *inode)
367{}
368
369static inline void __fsnotify_update_dcache_flags(struct dentry *dentry)
370{}
371
372static inline void __fsnotify_d_instantiate(struct dentry *dentry, struct inode *inode)
373{}
374
375static inline u32 fsnotify_get_cookie(void)
376{
377 return 0;
378}
379
380static inline void fsnotify_unmount_inodes(struct list_head *list)
381{}
382
383#endif /* CONFIG_FSNOTIFY */
384
385#endif /* __KERNEL __ */
386
387#endif /* __LINUX_FSNOTIFY_BACKEND_H */
diff --git a/init/Kconfig b/init/Kconfig
index 9b68fee8d79e..c649657e2259 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -302,7 +302,8 @@ config AUDITSYSCALL
302 302
303config AUDIT_TREE 303config AUDIT_TREE
304 def_bool y 304 def_bool y
305 depends on AUDITSYSCALL && INOTIFY 305 depends on AUDITSYSCALL
306 select INOTIFY
306 307
307menu "RCU Subsystem" 308menu "RCU Subsystem"
308 309