aboutsummaryrefslogtreecommitdiffstats
path: root/fs/notify/inotify
diff options
context:
space:
mode:
Diffstat (limited to 'fs/notify/inotify')
-rw-r--r--fs/notify/inotify/Kconfig15
-rw-r--r--fs/notify/inotify/Makefile1
-rw-r--r--fs/notify/inotify/inotify.c872
-rw-r--r--fs/notify/inotify/inotify.h7
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c151
-rw-r--r--fs/notify/inotify/inotify_user.c369
6 files changed, 338 insertions, 1077 deletions
diff --git a/fs/notify/inotify/Kconfig b/fs/notify/inotify/Kconfig
index b3a159b21cfd..b981fc0c8379 100644
--- a/fs/notify/inotify/Kconfig
+++ b/fs/notify/inotify/Kconfig
@@ -1,18 +1,3 @@
1config INOTIFY
2 bool "Inotify file change notification support"
3 default n
4 ---help---
5 Say Y here to enable legacy in kernel inotify support. Inotify is a
6 file change notification system. It is a replacement for dnotify.
7 This option only provides the legacy inotify in kernel API. There
8 are no in tree kernel users of this interface since it is deprecated.
9 You only need this if you are loading an out of tree kernel module
10 that uses inotify.
11
12 For more information, see <file:Documentation/filesystems/inotify.txt>
13
14 If unsure, say N.
15
16config INOTIFY_USER 1config INOTIFY_USER
17 bool "Inotify support for userspace" 2 bool "Inotify support for userspace"
18 select ANON_INODES 3 select ANON_INODES
diff --git a/fs/notify/inotify/Makefile b/fs/notify/inotify/Makefile
index 943828171362..a380dabe09de 100644
--- a/fs/notify/inotify/Makefile
+++ b/fs/notify/inotify/Makefile
@@ -1,2 +1 @@
1obj-$(CONFIG_INOTIFY) += inotify.o
2obj-$(CONFIG_INOTIFY_USER) += inotify_fsnotify.o inotify_user.o obj-$(CONFIG_INOTIFY_USER) += inotify_fsnotify.o inotify_user.o
diff --git a/fs/notify/inotify/inotify.c b/fs/notify/inotify/inotify.c
deleted file mode 100644
index cf6b0429a257..000000000000
--- a/fs/notify/inotify/inotify.c
+++ /dev/null
@@ -1,872 +0,0 @@
1/*
2 * fs/inotify.c - inode-based file event notifications
3 *
4 * Authors:
5 * John McCutchan <ttb@tentacle.dhs.org>
6 * Robert Love <rml@novell.com>
7 *
8 * Kernel API added by: Amy Griffis <amy.griffis@hp.com>
9 *
10 * Copyright (C) 2005 John McCutchan
11 * Copyright 2006 Hewlett-Packard Development Company, L.P.
12 *
13 * This program is free software; you can redistribute it and/or modify it
14 * under the terms of the GNU General Public License as published by the
15 * Free Software Foundation; either version 2, or (at your option) any
16 * later version.
17 *
18 * This program is distributed in the hope that it will be useful, but
19 * WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * General Public License for more details.
22 */
23
24#include <linux/module.h>
25#include <linux/kernel.h>
26#include <linux/spinlock.h>
27#include <linux/idr.h>
28#include <linux/slab.h>
29#include <linux/fs.h>
30#include <linux/sched.h>
31#include <linux/init.h>
32#include <linux/list.h>
33#include <linux/writeback.h>
34#include <linux/inotify.h>
35#include <linux/fsnotify_backend.h>
36
37static atomic_t inotify_cookie;
38
39/*
40 * Lock ordering:
41 *
42 * dentry->d_lock (used to keep d_move() away from dentry->d_parent)
43 * iprune_mutex (synchronize shrink_icache_memory())
44 * inode_lock (protects the super_block->s_inodes list)
45 * inode->inotify_mutex (protects inode->inotify_watches and watches->i_list)
46 * inotify_handle->mutex (protects inotify_handle and watches->h_list)
47 *
48 * The inode->inotify_mutex and inotify_handle->mutex and held during execution
49 * of a caller's event handler. Thus, the caller must not hold any locks
50 * taken in their event handler while calling any of the published inotify
51 * interfaces.
52 */
53
54/*
55 * Lifetimes of the three main data structures--inotify_handle, inode, and
56 * inotify_watch--are managed by reference count.
57 *
58 * inotify_handle: Lifetime is from inotify_init() to inotify_destroy().
59 * Additional references can bump the count via get_inotify_handle() and drop
60 * the count via put_inotify_handle().
61 *
62 * inotify_watch: for inotify's purposes, lifetime is from inotify_add_watch()
63 * to remove_watch_no_event(). Additional references can bump the count via
64 * get_inotify_watch() and drop the count via put_inotify_watch(). The caller
65 * is reponsible for the final put after receiving IN_IGNORED, or when using
66 * IN_ONESHOT after receiving the first event. Inotify does the final put if
67 * inotify_destroy() is called.
68 *
69 * inode: Pinned so long as the inode is associated with a watch, from
70 * inotify_add_watch() to the final put_inotify_watch().
71 */
72
73/*
74 * struct inotify_handle - represents an inotify instance
75 *
76 * This structure is protected by the mutex 'mutex'.
77 */
78struct inotify_handle {
79 struct idr idr; /* idr mapping wd -> watch */
80 struct mutex mutex; /* protects this bad boy */
81 struct list_head watches; /* list of watches */
82 atomic_t count; /* reference count */
83 u32 last_wd; /* the last wd allocated */
84 const struct inotify_operations *in_ops; /* inotify caller operations */
85};
86
87static inline void get_inotify_handle(struct inotify_handle *ih)
88{
89 atomic_inc(&ih->count);
90}
91
92static inline void put_inotify_handle(struct inotify_handle *ih)
93{
94 if (atomic_dec_and_test(&ih->count)) {
95 idr_destroy(&ih->idr);
96 kfree(ih);
97 }
98}
99
100/**
101 * get_inotify_watch - grab a reference to an inotify_watch
102 * @watch: watch to grab
103 */
104void get_inotify_watch(struct inotify_watch *watch)
105{
106 atomic_inc(&watch->count);
107}
108EXPORT_SYMBOL_GPL(get_inotify_watch);
109
110int pin_inotify_watch(struct inotify_watch *watch)
111{
112 struct super_block *sb = watch->inode->i_sb;
113 if (atomic_inc_not_zero(&sb->s_active)) {
114 atomic_inc(&watch->count);
115 return 1;
116 }
117 return 0;
118}
119
120/**
121 * put_inotify_watch - decrements the ref count on a given watch. cleans up
122 * watch references if the count reaches zero. inotify_watch is freed by
123 * inotify callers via the destroy_watch() op.
124 * @watch: watch to release
125 */
126void put_inotify_watch(struct inotify_watch *watch)
127{
128 if (atomic_dec_and_test(&watch->count)) {
129 struct inotify_handle *ih = watch->ih;
130
131 iput(watch->inode);
132 ih->in_ops->destroy_watch(watch);
133 put_inotify_handle(ih);
134 }
135}
136EXPORT_SYMBOL_GPL(put_inotify_watch);
137
138void unpin_inotify_watch(struct inotify_watch *watch)
139{
140 struct super_block *sb = watch->inode->i_sb;
141 put_inotify_watch(watch);
142 deactivate_super(sb);
143}
144
145/*
146 * inotify_handle_get_wd - returns the next WD for use by the given handle
147 *
148 * Callers must hold ih->mutex. This function can sleep.
149 */
150static int inotify_handle_get_wd(struct inotify_handle *ih,
151 struct inotify_watch *watch)
152{
153 int ret;
154
155 do {
156 if (unlikely(!idr_pre_get(&ih->idr, GFP_NOFS)))
157 return -ENOSPC;
158 ret = idr_get_new_above(&ih->idr, watch, ih->last_wd+1, &watch->wd);
159 } while (ret == -EAGAIN);
160
161 if (likely(!ret))
162 ih->last_wd = watch->wd;
163
164 return ret;
165}
166
167/*
168 * inotify_inode_watched - returns nonzero if there are watches on this inode
169 * and zero otherwise. We call this lockless, we do not care if we race.
170 */
171static inline int inotify_inode_watched(struct inode *inode)
172{
173 return !list_empty(&inode->inotify_watches);
174}
175
176/*
177 * Get child dentry flag into synch with parent inode.
178 * Flag should always be clear for negative dentrys.
179 */
180static void set_dentry_child_flags(struct inode *inode, int watched)
181{
182 struct dentry *alias;
183
184 spin_lock(&dcache_lock);
185 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
186 struct dentry *child;
187
188 list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
189 if (!child->d_inode)
190 continue;
191
192 spin_lock(&child->d_lock);
193 if (watched)
194 child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
195 else
196 child->d_flags &=~DCACHE_INOTIFY_PARENT_WATCHED;
197 spin_unlock(&child->d_lock);
198 }
199 }
200 spin_unlock(&dcache_lock);
201}
202
203/*
204 * inotify_find_handle - find the watch associated with the given inode and
205 * handle
206 *
207 * Callers must hold inode->inotify_mutex.
208 */
209static struct inotify_watch *inode_find_handle(struct inode *inode,
210 struct inotify_handle *ih)
211{
212 struct inotify_watch *watch;
213
214 list_for_each_entry(watch, &inode->inotify_watches, i_list) {
215 if (watch->ih == ih)
216 return watch;
217 }
218
219 return NULL;
220}
221
222/*
223 * remove_watch_no_event - remove watch without the IN_IGNORED event.
224 *
225 * Callers must hold both inode->inotify_mutex and ih->mutex.
226 */
227static void remove_watch_no_event(struct inotify_watch *watch,
228 struct inotify_handle *ih)
229{
230 list_del(&watch->i_list);
231 list_del(&watch->h_list);
232
233 if (!inotify_inode_watched(watch->inode))
234 set_dentry_child_flags(watch->inode, 0);
235
236 idr_remove(&ih->idr, watch->wd);
237}
238
239/**
240 * inotify_remove_watch_locked - Remove a watch from both the handle and the
241 * inode. Sends the IN_IGNORED event signifying that the inode is no longer
242 * watched. May be invoked from a caller's event handler.
243 * @ih: inotify handle associated with watch
244 * @watch: watch to remove
245 *
246 * Callers must hold both inode->inotify_mutex and ih->mutex.
247 */
248void inotify_remove_watch_locked(struct inotify_handle *ih,
249 struct inotify_watch *watch)
250{
251 remove_watch_no_event(watch, ih);
252 ih->in_ops->handle_event(watch, watch->wd, IN_IGNORED, 0, NULL, NULL);
253}
254EXPORT_SYMBOL_GPL(inotify_remove_watch_locked);
255
256/* Kernel API for producing events */
257
258/*
259 * inotify_d_instantiate - instantiate dcache entry for inode
260 */
261void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
262{
263 struct dentry *parent;
264
265 if (!inode)
266 return;
267
268 spin_lock(&entry->d_lock);
269 parent = entry->d_parent;
270 if (parent->d_inode && inotify_inode_watched(parent->d_inode))
271 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
272 spin_unlock(&entry->d_lock);
273}
274
275/*
276 * inotify_d_move - dcache entry has been moved
277 */
278void inotify_d_move(struct dentry *entry)
279{
280 struct dentry *parent;
281
282 parent = entry->d_parent;
283 if (inotify_inode_watched(parent->d_inode))
284 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
285 else
286 entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
287}
288
289/**
290 * inotify_inode_queue_event - queue an event to all watches on this inode
291 * @inode: inode event is originating from
292 * @mask: event mask describing this event
293 * @cookie: cookie for synchronization, or zero
294 * @name: filename, if any
295 * @n_inode: inode associated with name
296 */
297void inotify_inode_queue_event(struct inode *inode, u32 mask, u32 cookie,
298 const char *name, struct inode *n_inode)
299{
300 struct inotify_watch *watch, *next;
301
302 if (!inotify_inode_watched(inode))
303 return;
304
305 mutex_lock(&inode->inotify_mutex);
306 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
307 u32 watch_mask = watch->mask;
308 if (watch_mask & mask) {
309 struct inotify_handle *ih= watch->ih;
310 mutex_lock(&ih->mutex);
311 if (watch_mask & IN_ONESHOT)
312 remove_watch_no_event(watch, ih);
313 ih->in_ops->handle_event(watch, watch->wd, mask, cookie,
314 name, n_inode);
315 mutex_unlock(&ih->mutex);
316 }
317 }
318 mutex_unlock(&inode->inotify_mutex);
319}
320EXPORT_SYMBOL_GPL(inotify_inode_queue_event);
321
322/**
323 * inotify_dentry_parent_queue_event - queue an event to a dentry's parent
324 * @dentry: the dentry in question, we queue against this dentry's parent
325 * @mask: event mask describing this event
326 * @cookie: cookie for synchronization, or zero
327 * @name: filename, if any
328 */
329void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
330 u32 cookie, const char *name)
331{
332 struct dentry *parent;
333 struct inode *inode;
334
335 if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
336 return;
337
338 spin_lock(&dentry->d_lock);
339 parent = dentry->d_parent;
340 inode = parent->d_inode;
341
342 if (inotify_inode_watched(inode)) {
343 dget(parent);
344 spin_unlock(&dentry->d_lock);
345 inotify_inode_queue_event(inode, mask, cookie, name,
346 dentry->d_inode);
347 dput(parent);
348 } else
349 spin_unlock(&dentry->d_lock);
350}
351EXPORT_SYMBOL_GPL(inotify_dentry_parent_queue_event);
352
353/**
354 * inotify_get_cookie - return a unique cookie for use in synchronizing events.
355 */
356u32 inotify_get_cookie(void)
357{
358 return atomic_inc_return(&inotify_cookie);
359}
360EXPORT_SYMBOL_GPL(inotify_get_cookie);
361
362/**
363 * inotify_unmount_inodes - an sb is unmounting. handle any watched inodes.
364 * @list: list of inodes being unmounted (sb->s_inodes)
365 *
366 * Called with inode_lock held, protecting the unmounting super block's list
367 * of inodes, and with iprune_mutex held, keeping shrink_icache_memory() at bay.
368 * We temporarily drop inode_lock, however, and CAN block.
369 */
370void inotify_unmount_inodes(struct list_head *list)
371{
372 struct inode *inode, *next_i, *need_iput = NULL;
373
374 list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
375 struct inotify_watch *watch, *next_w;
376 struct inode *need_iput_tmp;
377 struct list_head *watches;
378
379 /*
380 * We cannot __iget() an inode in state I_FREEING,
381 * I_WILL_FREE, or I_NEW which is fine because by that point
382 * the inode cannot have any associated watches.
383 */
384 if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW))
385 continue;
386
387 /*
388 * If i_count is zero, the inode cannot have any watches and
389 * doing an __iget/iput with MS_ACTIVE clear would actually
390 * evict all inodes with zero i_count from icache which is
391 * unnecessarily violent and may in fact be illegal to do.
392 */
393 if (!atomic_read(&inode->i_count))
394 continue;
395
396 need_iput_tmp = need_iput;
397 need_iput = NULL;
398 /* In case inotify_remove_watch_locked() drops a reference. */
399 if (inode != need_iput_tmp)
400 __iget(inode);
401 else
402 need_iput_tmp = NULL;
403 /* In case the dropping of a reference would nuke next_i. */
404 if ((&next_i->i_sb_list != list) &&
405 atomic_read(&next_i->i_count) &&
406 !(next_i->i_state & (I_FREEING|I_WILL_FREE))) {
407 __iget(next_i);
408 need_iput = next_i;
409 }
410
411 /*
412 * We can safely drop inode_lock here because we hold
413 * references on both inode and next_i. Also no new inodes
414 * will be added since the umount has begun. Finally,
415 * iprune_mutex keeps shrink_icache_memory() away.
416 */
417 spin_unlock(&inode_lock);
418
419 if (need_iput_tmp)
420 iput(need_iput_tmp);
421
422 /* for each watch, send IN_UNMOUNT and then remove it */
423 mutex_lock(&inode->inotify_mutex);
424 watches = &inode->inotify_watches;
425 list_for_each_entry_safe(watch, next_w, watches, i_list) {
426 struct inotify_handle *ih= watch->ih;
427 get_inotify_watch(watch);
428 mutex_lock(&ih->mutex);
429 ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0,
430 NULL, NULL);
431 inotify_remove_watch_locked(ih, watch);
432 mutex_unlock(&ih->mutex);
433 put_inotify_watch(watch);
434 }
435 mutex_unlock(&inode->inotify_mutex);
436 iput(inode);
437
438 spin_lock(&inode_lock);
439 }
440}
441EXPORT_SYMBOL_GPL(inotify_unmount_inodes);
442
443/**
444 * inotify_inode_is_dead - an inode has been deleted, cleanup any watches
445 * @inode: inode that is about to be removed
446 */
447void inotify_inode_is_dead(struct inode *inode)
448{
449 struct inotify_watch *watch, *next;
450
451 mutex_lock(&inode->inotify_mutex);
452 list_for_each_entry_safe(watch, next, &inode->inotify_watches, i_list) {
453 struct inotify_handle *ih = watch->ih;
454 mutex_lock(&ih->mutex);
455 inotify_remove_watch_locked(ih, watch);
456 mutex_unlock(&ih->mutex);
457 }
458 mutex_unlock(&inode->inotify_mutex);
459}
460EXPORT_SYMBOL_GPL(inotify_inode_is_dead);
461
462/* Kernel Consumer API */
463
464/**
465 * inotify_init - allocate and initialize an inotify instance
466 * @ops: caller's inotify operations
467 */
468struct inotify_handle *inotify_init(const struct inotify_operations *ops)
469{
470 struct inotify_handle *ih;
471
472 ih = kmalloc(sizeof(struct inotify_handle), GFP_KERNEL);
473 if (unlikely(!ih))
474 return ERR_PTR(-ENOMEM);
475
476 idr_init(&ih->idr);
477 INIT_LIST_HEAD(&ih->watches);
478 mutex_init(&ih->mutex);
479 ih->last_wd = 0;
480 ih->in_ops = ops;
481 atomic_set(&ih->count, 0);
482 get_inotify_handle(ih);
483
484 return ih;
485}
486EXPORT_SYMBOL_GPL(inotify_init);
487
488/**
489 * inotify_init_watch - initialize an inotify watch
490 * @watch: watch to initialize
491 */
492void inotify_init_watch(struct inotify_watch *watch)
493{
494 INIT_LIST_HEAD(&watch->h_list);
495 INIT_LIST_HEAD(&watch->i_list);
496 atomic_set(&watch->count, 0);
497 get_inotify_watch(watch); /* initial get */
498}
499EXPORT_SYMBOL_GPL(inotify_init_watch);
500
501/*
502 * Watch removals suck violently. To kick the watch out we need (in this
503 * order) inode->inotify_mutex and ih->mutex. That's fine if we have
504 * a hold on inode; however, for all other cases we need to make damn sure
505 * we don't race with umount. We can *NOT* just grab a reference to a
506 * watch - inotify_unmount_inodes() will happily sail past it and we'll end
507 * with reference to inode potentially outliving its superblock. Ideally
508 * we just want to grab an active reference to superblock if we can; that
509 * will make sure we won't go into inotify_umount_inodes() until we are
510 * done. Cleanup is just deactivate_super(). However, that leaves a messy
511 * case - what if we *are* racing with umount() and active references to
512 * superblock can't be acquired anymore? We can bump ->s_count, grab
513 * ->s_umount, which will wait until the superblock is shut down and the
514 * watch in question is pining for fjords.
515 *
516 * And yes, this is far beyond mere "not very pretty"; so's the entire
517 * concept of inotify to start with.
518 */
519
520/**
521 * pin_to_kill - pin the watch down for removal
522 * @ih: inotify handle
523 * @watch: watch to kill
524 *
525 * Called with ih->mutex held, drops it. Possible return values:
526 * 0 - nothing to do, it has died
527 * 1 - remove it, drop the reference and deactivate_super()
528 */
529static int pin_to_kill(struct inotify_handle *ih, struct inotify_watch *watch)
530{
531 struct super_block *sb = watch->inode->i_sb;
532
533 if (atomic_inc_not_zero(&sb->s_active)) {
534 get_inotify_watch(watch);
535 mutex_unlock(&ih->mutex);
536 return 1; /* the best outcome */
537 }
538 spin_lock(&sb_lock);
539 sb->s_count++;
540 spin_unlock(&sb_lock);
541 mutex_unlock(&ih->mutex); /* can't grab ->s_umount under it */
542 down_read(&sb->s_umount);
543 /* fs is already shut down; the watch is dead */
544 drop_super(sb);
545 return 0;
546}
547
548static void unpin_and_kill(struct inotify_watch *watch)
549{
550 struct super_block *sb = watch->inode->i_sb;
551 put_inotify_watch(watch);
552 deactivate_super(sb);
553}
554
555/**
556 * inotify_destroy - clean up and destroy an inotify instance
557 * @ih: inotify handle
558 */
559void inotify_destroy(struct inotify_handle *ih)
560{
561 /*
562 * Destroy all of the watches for this handle. Unfortunately, not very
563 * pretty. We cannot do a simple iteration over the list, because we
564 * do not know the inode until we iterate to the watch. But we need to
565 * hold inode->inotify_mutex before ih->mutex. The following works.
566 *
567 * AV: it had to become even uglier to start working ;-/
568 */
569 while (1) {
570 struct inotify_watch *watch;
571 struct list_head *watches;
572 struct super_block *sb;
573 struct inode *inode;
574
575 mutex_lock(&ih->mutex);
576 watches = &ih->watches;
577 if (list_empty(watches)) {
578 mutex_unlock(&ih->mutex);
579 break;
580 }
581 watch = list_first_entry(watches, struct inotify_watch, h_list);
582 sb = watch->inode->i_sb;
583 if (!pin_to_kill(ih, watch))
584 continue;
585
586 inode = watch->inode;
587 mutex_lock(&inode->inotify_mutex);
588 mutex_lock(&ih->mutex);
589
590 /* make sure we didn't race with another list removal */
591 if (likely(idr_find(&ih->idr, watch->wd))) {
592 remove_watch_no_event(watch, ih);
593 put_inotify_watch(watch);
594 }
595
596 mutex_unlock(&ih->mutex);
597 mutex_unlock(&inode->inotify_mutex);
598 unpin_and_kill(watch);
599 }
600
601 /* free this handle: the put matching the get in inotify_init() */
602 put_inotify_handle(ih);
603}
604EXPORT_SYMBOL_GPL(inotify_destroy);
605
606/**
607 * inotify_find_watch - find an existing watch for an (ih,inode) pair
608 * @ih: inotify handle
609 * @inode: inode to watch
610 * @watchp: pointer to existing inotify_watch
611 *
612 * Caller must pin given inode (via nameidata).
613 */
614s32 inotify_find_watch(struct inotify_handle *ih, struct inode *inode,
615 struct inotify_watch **watchp)
616{
617 struct inotify_watch *old;
618 int ret = -ENOENT;
619
620 mutex_lock(&inode->inotify_mutex);
621 mutex_lock(&ih->mutex);
622
623 old = inode_find_handle(inode, ih);
624 if (unlikely(old)) {
625 get_inotify_watch(old); /* caller must put watch */
626 *watchp = old;
627 ret = old->wd;
628 }
629
630 mutex_unlock(&ih->mutex);
631 mutex_unlock(&inode->inotify_mutex);
632
633 return ret;
634}
635EXPORT_SYMBOL_GPL(inotify_find_watch);
636
637/**
638 * inotify_find_update_watch - find and update the mask of an existing watch
639 * @ih: inotify handle
640 * @inode: inode's watch to update
641 * @mask: mask of events to watch
642 *
643 * Caller must pin given inode (via nameidata).
644 */
645s32 inotify_find_update_watch(struct inotify_handle *ih, struct inode *inode,
646 u32 mask)
647{
648 struct inotify_watch *old;
649 int mask_add = 0;
650 int ret;
651
652 if (mask & IN_MASK_ADD)
653 mask_add = 1;
654
655 /* don't allow invalid bits: we don't want flags set */
656 mask &= IN_ALL_EVENTS | IN_ONESHOT;
657 if (unlikely(!mask))
658 return -EINVAL;
659
660 mutex_lock(&inode->inotify_mutex);
661 mutex_lock(&ih->mutex);
662
663 /*
664 * Handle the case of re-adding a watch on an (inode,ih) pair that we
665 * are already watching. We just update the mask and return its wd.
666 */
667 old = inode_find_handle(inode, ih);
668 if (unlikely(!old)) {
669 ret = -ENOENT;
670 goto out;
671 }
672
673 if (mask_add)
674 old->mask |= mask;
675 else
676 old->mask = mask;
677 ret = old->wd;
678out:
679 mutex_unlock(&ih->mutex);
680 mutex_unlock(&inode->inotify_mutex);
681 return ret;
682}
683EXPORT_SYMBOL_GPL(inotify_find_update_watch);
684
685/**
686 * inotify_add_watch - add a watch to an inotify instance
687 * @ih: inotify handle
688 * @watch: caller allocated watch structure
689 * @inode: inode to watch
690 * @mask: mask of events to watch
691 *
692 * Caller must pin given inode (via nameidata).
693 * Caller must ensure it only calls inotify_add_watch() once per watch.
694 * Calls inotify_handle_get_wd() so may sleep.
695 */
696s32 inotify_add_watch(struct inotify_handle *ih, struct inotify_watch *watch,
697 struct inode *inode, u32 mask)
698{
699 int ret = 0;
700 int newly_watched;
701
702 /* don't allow invalid bits: we don't want flags set */
703 mask &= IN_ALL_EVENTS | IN_ONESHOT;
704 if (unlikely(!mask))
705 return -EINVAL;
706 watch->mask = mask;
707
708 mutex_lock(&inode->inotify_mutex);
709 mutex_lock(&ih->mutex);
710
711 /* Initialize a new watch */
712 ret = inotify_handle_get_wd(ih, watch);
713 if (unlikely(ret))
714 goto out;
715 ret = watch->wd;
716
717 /* save a reference to handle and bump the count to make it official */
718 get_inotify_handle(ih);
719 watch->ih = ih;
720
721 /*
722 * Save a reference to the inode and bump the ref count to make it
723 * official. We hold a reference to nameidata, which makes this safe.
724 */
725 watch->inode = igrab(inode);
726
727 /* Add the watch to the handle's and the inode's list */
728 newly_watched = !inotify_inode_watched(inode);
729 list_add(&watch->h_list, &ih->watches);
730 list_add(&watch->i_list, &inode->inotify_watches);
731 /*
732 * Set child flags _after_ adding the watch, so there is no race
733 * windows where newly instantiated children could miss their parent's
734 * watched flag.
735 */
736 if (newly_watched)
737 set_dentry_child_flags(inode, 1);
738
739out:
740 mutex_unlock(&ih->mutex);
741 mutex_unlock(&inode->inotify_mutex);
742 return ret;
743}
744EXPORT_SYMBOL_GPL(inotify_add_watch);
745
746/**
747 * inotify_clone_watch - put the watch next to existing one
748 * @old: already installed watch
749 * @new: new watch
750 *
751 * Caller must hold the inotify_mutex of inode we are dealing with;
752 * it is expected to remove the old watch before unlocking the inode.
753 */
754s32 inotify_clone_watch(struct inotify_watch *old, struct inotify_watch *new)
755{
756 struct inotify_handle *ih = old->ih;
757 int ret = 0;
758
759 new->mask = old->mask;
760 new->ih = ih;
761
762 mutex_lock(&ih->mutex);
763
764 /* Initialize a new watch */
765 ret = inotify_handle_get_wd(ih, new);
766 if (unlikely(ret))
767 goto out;
768 ret = new->wd;
769
770 get_inotify_handle(ih);
771
772 new->inode = igrab(old->inode);
773
774 list_add(&new->h_list, &ih->watches);
775 list_add(&new->i_list, &old->inode->inotify_watches);
776out:
777 mutex_unlock(&ih->mutex);
778 return ret;
779}
780
781void inotify_evict_watch(struct inotify_watch *watch)
782{
783 get_inotify_watch(watch);
784 mutex_lock(&watch->ih->mutex);
785 inotify_remove_watch_locked(watch->ih, watch);
786 mutex_unlock(&watch->ih->mutex);
787}
788
789/**
790 * inotify_rm_wd - remove a watch from an inotify instance
791 * @ih: inotify handle
792 * @wd: watch descriptor to remove
793 *
794 * Can sleep.
795 */
796int inotify_rm_wd(struct inotify_handle *ih, u32 wd)
797{
798 struct inotify_watch *watch;
799 struct super_block *sb;
800 struct inode *inode;
801
802 mutex_lock(&ih->mutex);
803 watch = idr_find(&ih->idr, wd);
804 if (unlikely(!watch)) {
805 mutex_unlock(&ih->mutex);
806 return -EINVAL;
807 }
808 sb = watch->inode->i_sb;
809 if (!pin_to_kill(ih, watch))
810 return 0;
811
812 inode = watch->inode;
813
814 mutex_lock(&inode->inotify_mutex);
815 mutex_lock(&ih->mutex);
816
817 /* make sure that we did not race */
818 if (likely(idr_find(&ih->idr, wd) == watch))
819 inotify_remove_watch_locked(ih, watch);
820
821 mutex_unlock(&ih->mutex);
822 mutex_unlock(&inode->inotify_mutex);
823 unpin_and_kill(watch);
824
825 return 0;
826}
827EXPORT_SYMBOL_GPL(inotify_rm_wd);
828
829/**
830 * inotify_rm_watch - remove a watch from an inotify instance
831 * @ih: inotify handle
832 * @watch: watch to remove
833 *
834 * Can sleep.
835 */
836int inotify_rm_watch(struct inotify_handle *ih,
837 struct inotify_watch *watch)
838{
839 return inotify_rm_wd(ih, watch->wd);
840}
841EXPORT_SYMBOL_GPL(inotify_rm_watch);
842
843/*
844 * inotify_setup - core initialization function
845 */
846static int __init inotify_setup(void)
847{
848 BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
849 BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
850 BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
851 BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
852 BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
853 BUILD_BUG_ON(IN_OPEN != FS_OPEN);
854 BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
855 BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
856 BUILD_BUG_ON(IN_CREATE != FS_CREATE);
857 BUILD_BUG_ON(IN_DELETE != FS_DELETE);
858 BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
859 BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
860 BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
861
862 BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
863 BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
864 BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
865 BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
866
867 atomic_set(&inotify_cookie, 0);
868
869 return 0;
870}
871
872module_init(inotify_setup);
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index f234f3a4c8ca..b6642e4de4bf 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -9,13 +9,12 @@ struct inotify_event_private_data {
9 int wd; 9 int wd;
10}; 10};
11 11
12struct inotify_inode_mark_entry { 12struct inotify_inode_mark {
13 /* fsnotify_mark_entry MUST be the first thing */ 13 struct fsnotify_mark fsn_mark;
14 struct fsnotify_mark_entry fsn_entry;
15 int wd; 14 int wd;
16}; 15};
17 16
18extern void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, 17extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
19 struct fsnotify_group *group); 18 struct fsnotify_group *group);
20extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv); 19extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
21 20
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index e27960cd76ab..5e73eeb2c697 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -22,6 +22,7 @@
22 * General Public License for more details. 22 * General Public License for more details.
23 */ 23 */
24 24
25#include <linux/dcache.h> /* d_unlinked */
25#include <linux/fs.h> /* struct inode */ 26#include <linux/fs.h> /* struct inode */
26#include <linux/fsnotify_backend.h> 27#include <linux/fsnotify_backend.h>
27#include <linux/inotify.h> 28#include <linux/inotify.h>
@@ -32,26 +33,84 @@
32 33
33#include "inotify.h" 34#include "inotify.h"
34 35
35static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_event *event) 36/*
37 * Check if 2 events contain the same information. We do not compare private data
38 * but at this moment that isn't a problem for any know fsnotify listeners.
39 */
40static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
41{
42 if ((old->mask == new->mask) &&
43 (old->to_tell == new->to_tell) &&
44 (old->data_type == new->data_type) &&
45 (old->name_len == new->name_len)) {
46 switch (old->data_type) {
47 case (FSNOTIFY_EVENT_INODE):
48 /* remember, after old was put on the wait_q we aren't
49 * allowed to look at the inode any more, only thing
50 * left to check was if the file_name is the same */
51 if (!old->name_len ||
52 !strcmp(old->file_name, new->file_name))
53 return true;
54 break;
55 case (FSNOTIFY_EVENT_FILE):
56 if ((old->file->f_path.mnt == new->file->f_path.mnt) &&
57 (old->file->f_path.dentry == new->file->f_path.dentry))
58 return true;
59 break;
60 case (FSNOTIFY_EVENT_NONE):
61 if (old->mask & FS_Q_OVERFLOW)
62 return true;
63 else if (old->mask & FS_IN_IGNORED)
64 return false;
65 return true;
66 };
67 }
68 return false;
69}
70
71static struct fsnotify_event *inotify_merge(struct list_head *list,
72 struct fsnotify_event *event)
36{ 73{
37 struct fsnotify_mark_entry *entry; 74 struct fsnotify_event_holder *last_holder;
38 struct inotify_inode_mark_entry *ientry; 75 struct fsnotify_event *last_event;
76
77 /* and the list better be locked by something too */
78 spin_lock(&event->lock);
79
80 last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
81 last_event = last_holder->event;
82 if (event_compare(last_event, event))
83 fsnotify_get_event(last_event);
84 else
85 last_event = NULL;
86
87 spin_unlock(&event->lock);
88
89 return last_event;
90}
91
92static int inotify_handle_event(struct fsnotify_group *group,
93 struct fsnotify_mark *inode_mark,
94 struct fsnotify_mark *vfsmount_mark,
95 struct fsnotify_event *event)
96{
97 struct inotify_inode_mark *i_mark;
39 struct inode *to_tell; 98 struct inode *to_tell;
40 struct inotify_event_private_data *event_priv; 99 struct inotify_event_private_data *event_priv;
41 struct fsnotify_event_private_data *fsn_event_priv; 100 struct fsnotify_event_private_data *fsn_event_priv;
42 int wd, ret; 101 struct fsnotify_event *added_event;
102 int wd, ret = 0;
103
104 BUG_ON(vfsmount_mark);
105
106 pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group,
107 event, event->to_tell, event->mask);
43 108
44 to_tell = event->to_tell; 109 to_tell = event->to_tell;
45 110
46 spin_lock(&to_tell->i_lock); 111 i_mark = container_of(inode_mark, struct inotify_inode_mark,
47 entry = fsnotify_find_mark_entry(group, to_tell); 112 fsn_mark);
48 spin_unlock(&to_tell->i_lock); 113 wd = i_mark->wd;
49 /* race with watch removal? We already passes should_send */
50 if (unlikely(!entry))
51 return 0;
52 ientry = container_of(entry, struct inotify_inode_mark_entry,
53 fsn_entry);
54 wd = ientry->wd;
55 114
56 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL); 115 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
57 if (unlikely(!event_priv)) 116 if (unlikely(!event_priv))
@@ -62,48 +121,40 @@ static int inotify_handle_event(struct fsnotify_group *group, struct fsnotify_ev
62 fsn_event_priv->group = group; 121 fsn_event_priv->group = group;
63 event_priv->wd = wd; 122 event_priv->wd = wd;
64 123
65 ret = fsnotify_add_notify_event(group, event, fsn_event_priv); 124 added_event = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge);
66 if (ret) { 125 if (added_event) {
67 inotify_free_event_priv(fsn_event_priv); 126 inotify_free_event_priv(fsn_event_priv);
68 /* EEXIST says we tail matched, EOVERFLOW isn't something 127 if (!IS_ERR(added_event))
69 * to report up the stack. */ 128 fsnotify_put_event(added_event);
70 if ((ret == -EEXIST) || 129 else
71 (ret == -EOVERFLOW)) 130 ret = PTR_ERR(added_event);
72 ret = 0;
73 } 131 }
74 132
75 /* 133 if (inode_mark->mask & IN_ONESHOT)
76 * If we hold the entry until after the event is on the queue 134 fsnotify_destroy_mark(inode_mark);
77 * IN_IGNORED won't be able to pass this event in the queue
78 */
79 fsnotify_put_mark(entry);
80 135
81 return ret; 136 return ret;
82} 137}
83 138
84static void inotify_freeing_mark(struct fsnotify_mark_entry *entry, struct fsnotify_group *group) 139static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group)
85{ 140{
86 inotify_ignored_and_remove_idr(entry, group); 141 inotify_ignored_and_remove_idr(fsn_mark, group);
87} 142}
88 143
89static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode, __u32 mask) 144static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
145 struct fsnotify_mark *inode_mark,
146 struct fsnotify_mark *vfsmount_mark,
147 __u32 mask, void *data, int data_type)
90{ 148{
91 struct fsnotify_mark_entry *entry; 149 if ((inode_mark->mask & FS_EXCL_UNLINK) &&
92 bool send; 150 (data_type == FSNOTIFY_EVENT_FILE)) {
93 151 struct file *file = data;
94 spin_lock(&inode->i_lock);
95 entry = fsnotify_find_mark_entry(group, inode);
96 spin_unlock(&inode->i_lock);
97 if (!entry)
98 return false;
99 152
100 mask = (mask & ~FS_EVENT_ON_CHILD); 153 if (d_unlinked(file->f_path.dentry))
101 send = (entry->mask & mask); 154 return false;
102 155 }
103 /* find took a reference */
104 fsnotify_put_mark(entry);
105 156
106 return send; 157 return true;
107} 158}
108 159
109/* 160/*
@@ -115,18 +166,18 @@ static bool inotify_should_send_event(struct fsnotify_group *group, struct inode
115 */ 166 */
116static int idr_callback(int id, void *p, void *data) 167static int idr_callback(int id, void *p, void *data)
117{ 168{
118 struct fsnotify_mark_entry *entry; 169 struct fsnotify_mark *fsn_mark;
119 struct inotify_inode_mark_entry *ientry; 170 struct inotify_inode_mark *i_mark;
120 static bool warned = false; 171 static bool warned = false;
121 172
122 if (warned) 173 if (warned)
123 return 0; 174 return 0;
124 175
125 warned = true; 176 warned = true;
126 entry = p; 177 fsn_mark = p;
127 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 178 i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
128 179
129 WARN(1, "inotify closing but id=%d for entry=%p in group=%p still in " 180 WARN(1, "inotify closing but id=%d for fsn_mark=%p in group=%p still in "
130 "idr. Probably leaking memory\n", id, p, data); 181 "idr. Probably leaking memory\n", id, p, data);
131 182
132 /* 183 /*
@@ -135,9 +186,9 @@ static int idr_callback(int id, void *p, void *data)
135 * out why we got here and the panic is no worse than the original 186 * out why we got here and the panic is no worse than the original
136 * BUG() that was here. 187 * BUG() that was here.
137 */ 188 */
138 if (entry) 189 if (fsn_mark)
139 printk(KERN_WARNING "entry->group=%p inode=%p wd=%d\n", 190 printk(KERN_WARNING "fsn_mark->group=%p inode=%p wd=%d\n",
140 entry->group, entry->inode, ientry->wd); 191 fsn_mark->group, fsn_mark->i.inode, i_mark->wd);
141 return 0; 192 return 0;
142} 193}
143 194
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index e46ca685b9be..bf7f6d776c31 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -46,17 +46,11 @@
46/* these are configurable via /proc/sys/fs/inotify/ */ 46/* these are configurable via /proc/sys/fs/inotify/ */
47static int inotify_max_user_instances __read_mostly; 47static int inotify_max_user_instances __read_mostly;
48static int inotify_max_queued_events __read_mostly; 48static int inotify_max_queued_events __read_mostly;
49int inotify_max_user_watches __read_mostly; 49static int inotify_max_user_watches __read_mostly;
50 50
51static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; 51static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
52struct kmem_cache *event_priv_cachep __read_mostly; 52struct kmem_cache *event_priv_cachep __read_mostly;
53 53
54/*
55 * When inotify registers a new group it increments this and uses that
56 * value as an offset to set the fsnotify group "name" and priority.
57 */
58static atomic_t inotify_grp_num;
59
60#ifdef CONFIG_SYSCTL 54#ifdef CONFIG_SYSCTL
61 55
62#include <linux/sysctl.h> 56#include <linux/sysctl.h>
@@ -96,11 +90,14 @@ static inline __u32 inotify_arg_to_mask(u32 arg)
96{ 90{
97 __u32 mask; 91 __u32 mask;
98 92
99 /* everything should accept their own ignored and cares about children */ 93 /*
100 mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD); 94 * everything should accept their own ignored, cares about children,
95 * and should receive events when the inode is unmounted
96 */
97 mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT);
101 98
102 /* mask off the flags used to open the fd */ 99 /* mask off the flags used to open the fd */
103 mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT)); 100 mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK));
104 101
105 return mask; 102 return mask;
106} 103}
@@ -144,6 +141,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
144 141
145 event = fsnotify_peek_notify_event(group); 142 event = fsnotify_peek_notify_event(group);
146 143
144 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
145
147 if (event->name_len) 146 if (event->name_len)
148 event_size += roundup(event->name_len + 1, event_size); 147 event_size += roundup(event->name_len + 1, event_size);
149 148
@@ -173,6 +172,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
173 size_t event_size = sizeof(struct inotify_event); 172 size_t event_size = sizeof(struct inotify_event);
174 size_t name_len = 0; 173 size_t name_len = 0;
175 174
175 pr_debug("%s: group=%p event=%p\n", __func__, group, event);
176
176 /* we get the inotify watch descriptor from the event private data */ 177 /* we get the inotify watch descriptor from the event private data */
177 spin_lock(&event->lock); 178 spin_lock(&event->lock);
178 fsn_priv = fsnotify_remove_priv_from_event(group, event); 179 fsn_priv = fsnotify_remove_priv_from_event(group, event);
@@ -245,6 +246,8 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
245 kevent = get_one_event(group, count); 246 kevent = get_one_event(group, count);
246 mutex_unlock(&group->notification_mutex); 247 mutex_unlock(&group->notification_mutex);
247 248
249 pr_debug("%s: group=%p kevent=%p\n", __func__, group, kevent);
250
248 if (kevent) { 251 if (kevent) {
249 ret = PTR_ERR(kevent); 252 ret = PTR_ERR(kevent);
250 if (IS_ERR(kevent)) 253 if (IS_ERR(kevent))
@@ -289,6 +292,8 @@ static int inotify_release(struct inode *ignored, struct file *file)
289 struct fsnotify_group *group = file->private_data; 292 struct fsnotify_group *group = file->private_data;
290 struct user_struct *user = group->inotify_data.user; 293 struct user_struct *user = group->inotify_data.user;
291 294
295 pr_debug("%s: group=%p\n", __func__, group);
296
292 fsnotify_clear_marks_by_group(group); 297 fsnotify_clear_marks_by_group(group);
293 298
294 /* free this group, matching get was inotify_init->fsnotify_obtain_group */ 299 /* free this group, matching get was inotify_init->fsnotify_obtain_group */
@@ -312,6 +317,8 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
312 group = file->private_data; 317 group = file->private_data;
313 p = (void __user *) arg; 318 p = (void __user *) arg;
314 319
320 pr_debug("%s: group=%p cmd=%u\n", __func__, group, cmd);
321
315 switch (cmd) { 322 switch (cmd) {
316 case FIONREAD: 323 case FIONREAD:
317 mutex_lock(&group->notification_mutex); 324 mutex_lock(&group->notification_mutex);
@@ -357,59 +364,159 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns
357 return error; 364 return error;
358} 365}
359 366
367static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock,
368 int *last_wd,
369 struct inotify_inode_mark *i_mark)
370{
371 int ret;
372
373 do {
374 if (unlikely(!idr_pre_get(idr, GFP_KERNEL)))
375 return -ENOMEM;
376
377 spin_lock(idr_lock);
378 ret = idr_get_new_above(idr, i_mark, *last_wd + 1,
379 &i_mark->wd);
380 /* we added the mark to the idr, take a reference */
381 if (!ret) {
382 *last_wd = i_mark->wd;
383 fsnotify_get_mark(&i_mark->fsn_mark);
384 }
385 spin_unlock(idr_lock);
386 } while (ret == -EAGAIN);
387
388 return ret;
389}
390
391static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group,
392 int wd)
393{
394 struct idr *idr = &group->inotify_data.idr;
395 spinlock_t *idr_lock = &group->inotify_data.idr_lock;
396 struct inotify_inode_mark *i_mark;
397
398 assert_spin_locked(idr_lock);
399
400 i_mark = idr_find(idr, wd);
401 if (i_mark) {
402 struct fsnotify_mark *fsn_mark = &i_mark->fsn_mark;
403
404 fsnotify_get_mark(fsn_mark);
405 /* One ref for being in the idr, one ref we just took */
406 BUG_ON(atomic_read(&fsn_mark->refcnt) < 2);
407 }
408
409 return i_mark;
410}
411
412static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group,
413 int wd)
414{
415 struct inotify_inode_mark *i_mark;
416 spinlock_t *idr_lock = &group->inotify_data.idr_lock;
417
418 spin_lock(idr_lock);
419 i_mark = inotify_idr_find_locked(group, wd);
420 spin_unlock(idr_lock);
421
422 return i_mark;
423}
424
425static void do_inotify_remove_from_idr(struct fsnotify_group *group,
426 struct inotify_inode_mark *i_mark)
427{
428 struct idr *idr = &group->inotify_data.idr;
429 spinlock_t *idr_lock = &group->inotify_data.idr_lock;
430 int wd = i_mark->wd;
431
432 assert_spin_locked(idr_lock);
433
434 idr_remove(idr, wd);
435
436 /* removed from the idr, drop that ref */
437 fsnotify_put_mark(&i_mark->fsn_mark);
438}
439
360/* 440/*
361 * Remove the mark from the idr (if present) and drop the reference 441 * Remove the mark from the idr (if present) and drop the reference
362 * on the mark because it was in the idr. 442 * on the mark because it was in the idr.
363 */ 443 */
364static void inotify_remove_from_idr(struct fsnotify_group *group, 444static void inotify_remove_from_idr(struct fsnotify_group *group,
365 struct inotify_inode_mark_entry *ientry) 445 struct inotify_inode_mark *i_mark)
366{ 446{
367 struct idr *idr; 447 spinlock_t *idr_lock = &group->inotify_data.idr_lock;
368 struct fsnotify_mark_entry *entry; 448 struct inotify_inode_mark *found_i_mark = NULL;
369 struct inotify_inode_mark_entry *found_ientry;
370 int wd; 449 int wd;
371 450
372 spin_lock(&group->inotify_data.idr_lock); 451 spin_lock(idr_lock);
373 idr = &group->inotify_data.idr; 452 wd = i_mark->wd;
374 wd = ientry->wd;
375 453
376 if (wd == -1) 454 /*
455 * does this i_mark think it is in the idr? we shouldn't get called
456 * if it wasn't....
457 */
458 if (wd == -1) {
459 WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
460 " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
461 i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
377 goto out; 462 goto out;
463 }
378 464
379 entry = idr_find(&group->inotify_data.idr, wd); 465 /* Lets look in the idr to see if we find it */
380 if (unlikely(!entry)) 466 found_i_mark = inotify_idr_find_locked(group, wd);
467 if (unlikely(!found_i_mark)) {
468 WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
469 " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
470 i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
381 goto out; 471 goto out;
472 }
382 473
383 found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 474 /*
384 if (unlikely(found_ientry != ientry)) { 475 * We found an mark in the idr at the right wd, but it's
385 /* We found an entry in the idr with the right wd, but it's 476 * not the mark we were told to remove. eparis seriously
386 * not the entry we were told to remove. eparis seriously 477 * fucked up somewhere.
387 * fucked up somewhere. */ 478 */
388 WARN_ON(1); 479 if (unlikely(found_i_mark != i_mark)) {
389 ientry->wd = -1; 480 WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p "
481 "mark->inode=%p found_i_mark=%p found_i_mark->wd=%d "
482 "found_i_mark->group=%p found_i_mark->inode=%p\n",
483 __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group,
484 i_mark->fsn_mark.i.inode, found_i_mark, found_i_mark->wd,
485 found_i_mark->fsn_mark.group,
486 found_i_mark->fsn_mark.i.inode);
390 goto out; 487 goto out;
391 } 488 }
392 489
393 /* One ref for being in the idr, one ref held by the caller */ 490 /*
394 BUG_ON(atomic_read(&entry->refcnt) < 2); 491 * One ref for being in the idr
395 492 * one ref held by the caller trying to kill us
396 idr_remove(idr, wd); 493 * one ref grabbed by inotify_idr_find
397 ientry->wd = -1; 494 */
495 if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) {
496 printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
497 " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
498 i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
499 /* we can't really recover with bad ref cnting.. */
500 BUG();
501 }
398 502
399 /* removed from the idr, drop that ref */ 503 do_inotify_remove_from_idr(group, i_mark);
400 fsnotify_put_mark(entry);
401out: 504out:
402 spin_unlock(&group->inotify_data.idr_lock); 505 /* match the ref taken by inotify_idr_find_locked() */
506 if (found_i_mark)
507 fsnotify_put_mark(&found_i_mark->fsn_mark);
508 i_mark->wd = -1;
509 spin_unlock(idr_lock);
403} 510}
404 511
405/* 512/*
406 * Send IN_IGNORED for this wd, remove this wd from the idr. 513 * Send IN_IGNORED for this wd, remove this wd from the idr.
407 */ 514 */
408void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, 515void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
409 struct fsnotify_group *group) 516 struct fsnotify_group *group)
410{ 517{
411 struct inotify_inode_mark_entry *ientry; 518 struct inotify_inode_mark *i_mark;
412 struct fsnotify_event *ignored_event; 519 struct fsnotify_event *ignored_event, *notify_event;
413 struct inotify_event_private_data *event_priv; 520 struct inotify_event_private_data *event_priv;
414 struct fsnotify_event_private_data *fsn_event_priv; 521 struct fsnotify_event_private_data *fsn_event_priv;
415 int ret; 522 int ret;
@@ -420,7 +527,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
420 if (!ignored_event) 527 if (!ignored_event)
421 return; 528 return;
422 529
423 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 530 i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
424 531
425 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS); 532 event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
426 if (unlikely(!event_priv)) 533 if (unlikely(!event_priv))
@@ -429,37 +536,44 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
429 fsn_event_priv = &event_priv->fsnotify_event_priv_data; 536 fsn_event_priv = &event_priv->fsnotify_event_priv_data;
430 537
431 fsn_event_priv->group = group; 538 fsn_event_priv->group = group;
432 event_priv->wd = ientry->wd; 539 event_priv->wd = i_mark->wd;
433 540
434 ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); 541 notify_event = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL);
435 if (ret) 542 if (notify_event) {
543 if (IS_ERR(notify_event))
544 ret = PTR_ERR(notify_event);
545 else
546 fsnotify_put_event(notify_event);
436 inotify_free_event_priv(fsn_event_priv); 547 inotify_free_event_priv(fsn_event_priv);
548 }
437 549
438skip_send_ignore: 550skip_send_ignore:
439 551
440 /* matches the reference taken when the event was created */ 552 /* matches the reference taken when the event was created */
441 fsnotify_put_event(ignored_event); 553 fsnotify_put_event(ignored_event);
442 554
443 /* remove this entry from the idr */ 555 /* remove this mark from the idr */
444 inotify_remove_from_idr(group, ientry); 556 inotify_remove_from_idr(group, i_mark);
445 557
446 atomic_dec(&group->inotify_data.user->inotify_watches); 558 atomic_dec(&group->inotify_data.user->inotify_watches);
447} 559}
448 560
449/* ding dong the mark is dead */ 561/* ding dong the mark is dead */
450static void inotify_free_mark(struct fsnotify_mark_entry *entry) 562static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
451{ 563{
452 struct inotify_inode_mark_entry *ientry = (struct inotify_inode_mark_entry *)entry; 564 struct inotify_inode_mark *i_mark;
565
566 i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
453 567
454 kmem_cache_free(inotify_inode_mark_cachep, ientry); 568 kmem_cache_free(inotify_inode_mark_cachep, i_mark);
455} 569}
456 570
457static int inotify_update_existing_watch(struct fsnotify_group *group, 571static int inotify_update_existing_watch(struct fsnotify_group *group,
458 struct inode *inode, 572 struct inode *inode,
459 u32 arg) 573 u32 arg)
460{ 574{
461 struct fsnotify_mark_entry *entry; 575 struct fsnotify_mark *fsn_mark;
462 struct inotify_inode_mark_entry *ientry; 576 struct inotify_inode_mark *i_mark;
463 __u32 old_mask, new_mask; 577 __u32 old_mask, new_mask;
464 __u32 mask; 578 __u32 mask;
465 int add = (arg & IN_MASK_ADD); 579 int add = (arg & IN_MASK_ADD);
@@ -467,52 +581,43 @@ static int inotify_update_existing_watch(struct fsnotify_group *group,
467 581
468 /* don't allow invalid bits: we don't want flags set */ 582 /* don't allow invalid bits: we don't want flags set */
469 mask = inotify_arg_to_mask(arg); 583 mask = inotify_arg_to_mask(arg);
470 if (unlikely(!mask)) 584 if (unlikely(!(mask & IN_ALL_EVENTS)))
471 return -EINVAL; 585 return -EINVAL;
472 586
473 spin_lock(&inode->i_lock); 587 fsn_mark = fsnotify_find_inode_mark(group, inode);
474 entry = fsnotify_find_mark_entry(group, inode); 588 if (!fsn_mark)
475 spin_unlock(&inode->i_lock);
476 if (!entry)
477 return -ENOENT; 589 return -ENOENT;
478 590
479 ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); 591 i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
480 592
481 spin_lock(&entry->lock); 593 spin_lock(&fsn_mark->lock);
482 594
483 old_mask = entry->mask; 595 old_mask = fsn_mark->mask;
484 if (add) { 596 if (add)
485 entry->mask |= mask; 597 fsnotify_set_mark_mask_locked(fsn_mark, (fsn_mark->mask | mask));
486 new_mask = entry->mask; 598 else
487 } else { 599 fsnotify_set_mark_mask_locked(fsn_mark, mask);
488 entry->mask = mask; 600 new_mask = fsn_mark->mask;
489 new_mask = entry->mask;
490 }
491 601
492 spin_unlock(&entry->lock); 602 spin_unlock(&fsn_mark->lock);
493 603
494 if (old_mask != new_mask) { 604 if (old_mask != new_mask) {
495 /* more bits in old than in new? */ 605 /* more bits in old than in new? */
496 int dropped = (old_mask & ~new_mask); 606 int dropped = (old_mask & ~new_mask);
497 /* more bits in this entry than the inode's mask? */ 607 /* more bits in this fsn_mark than the inode's mask? */
498 int do_inode = (new_mask & ~inode->i_fsnotify_mask); 608 int do_inode = (new_mask & ~inode->i_fsnotify_mask);
499 /* more bits in this entry than the group? */
500 int do_group = (new_mask & ~group->mask);
501 609
502 /* update the inode with this new entry */ 610 /* update the inode with this new fsn_mark */
503 if (dropped || do_inode) 611 if (dropped || do_inode)
504 fsnotify_recalc_inode_mask(inode); 612 fsnotify_recalc_inode_mask(inode);
505 613
506 /* update the group mask with the new mask */
507 if (dropped || do_group)
508 fsnotify_recalc_group_mask(group);
509 } 614 }
510 615
511 /* return the wd */ 616 /* return the wd */
512 ret = ientry->wd; 617 ret = i_mark->wd;
513 618
514 /* match the get from fsnotify_find_mark_entry() */ 619 /* match the get from fsnotify_find_mark() */
515 fsnotify_put_mark(entry); 620 fsnotify_put_mark(fsn_mark);
516 621
517 return ret; 622 return ret;
518} 623}
@@ -521,73 +626,51 @@ static int inotify_new_watch(struct fsnotify_group *group,
521 struct inode *inode, 626 struct inode *inode,
522 u32 arg) 627 u32 arg)
523{ 628{
524 struct inotify_inode_mark_entry *tmp_ientry; 629 struct inotify_inode_mark *tmp_i_mark;
525 __u32 mask; 630 __u32 mask;
526 int ret; 631 int ret;
632 struct idr *idr = &group->inotify_data.idr;
633 spinlock_t *idr_lock = &group->inotify_data.idr_lock;
527 634
528 /* don't allow invalid bits: we don't want flags set */ 635 /* don't allow invalid bits: we don't want flags set */
529 mask = inotify_arg_to_mask(arg); 636 mask = inotify_arg_to_mask(arg);
530 if (unlikely(!mask)) 637 if (unlikely(!(mask & IN_ALL_EVENTS)))
531 return -EINVAL; 638 return -EINVAL;
532 639
533 tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); 640 tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL);
534 if (unlikely(!tmp_ientry)) 641 if (unlikely(!tmp_i_mark))
535 return -ENOMEM; 642 return -ENOMEM;
536 643
537 fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark); 644 fsnotify_init_mark(&tmp_i_mark->fsn_mark, inotify_free_mark);
538 tmp_ientry->fsn_entry.mask = mask; 645 tmp_i_mark->fsn_mark.mask = mask;
539 tmp_ientry->wd = -1; 646 tmp_i_mark->wd = -1;
540 647
541 ret = -ENOSPC; 648 ret = -ENOSPC;
542 if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) 649 if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches)
543 goto out_err; 650 goto out_err;
544retry:
545 ret = -ENOMEM;
546 if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL)))
547 goto out_err;
548 651
549 /* we are putting the mark on the idr, take a reference */ 652 ret = inotify_add_to_idr(idr, idr_lock, &group->inotify_data.last_wd,
550 fsnotify_get_mark(&tmp_ientry->fsn_entry); 653 tmp_i_mark);
551 654 if (ret)
552 spin_lock(&group->inotify_data.idr_lock);
553 ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
554 group->inotify_data.last_wd+1,
555 &tmp_ientry->wd);
556 spin_unlock(&group->inotify_data.idr_lock);
557 if (ret) {
558 /* we didn't get on the idr, drop the idr reference */
559 fsnotify_put_mark(&tmp_ientry->fsn_entry);
560
561 /* idr was out of memory allocate and try again */
562 if (ret == -EAGAIN)
563 goto retry;
564 goto out_err; 655 goto out_err;
565 }
566 656
567 /* we are on the idr, now get on the inode */ 657 /* we are on the idr, now get on the inode */
568 ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); 658 ret = fsnotify_add_mark(&tmp_i_mark->fsn_mark, group, inode, NULL, 0);
569 if (ret) { 659 if (ret) {
570 /* we failed to get on the inode, get off the idr */ 660 /* we failed to get on the inode, get off the idr */
571 inotify_remove_from_idr(group, tmp_ientry); 661 inotify_remove_from_idr(group, tmp_i_mark);
572 goto out_err; 662 goto out_err;
573 } 663 }
574 664
575 /* update the idr hint, who cares about races, it's just a hint */
576 group->inotify_data.last_wd = tmp_ientry->wd;
577
578 /* increment the number of watches the user has */ 665 /* increment the number of watches the user has */
579 atomic_inc(&group->inotify_data.user->inotify_watches); 666 atomic_inc(&group->inotify_data.user->inotify_watches);
580 667
581 /* return the watch descriptor for this new entry */ 668 /* return the watch descriptor for this new mark */
582 ret = tmp_ientry->wd; 669 ret = tmp_i_mark->wd;
583
584 /* if this mark added a new event update the group mask */
585 if (mask & ~group->mask)
586 fsnotify_recalc_group_mask(group);
587 670
588out_err: 671out_err:
589 /* match the ref from fsnotify_init_markentry() */ 672 /* match the ref from fsnotify_init_mark() */
590 fsnotify_put_mark(&tmp_ientry->fsn_entry); 673 fsnotify_put_mark(&tmp_i_mark->fsn_mark);
591 674
592 return ret; 675 return ret;
593} 676}
@@ -616,11 +699,8 @@ retry:
616static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events) 699static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events)
617{ 700{
618 struct fsnotify_group *group; 701 struct fsnotify_group *group;
619 unsigned int grp_num;
620 702
621 /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */ 703 group = fsnotify_alloc_group(&inotify_fsnotify_ops);
622 grp_num = (INOTIFY_GROUP_NUM - atomic_inc_return(&inotify_grp_num));
623 group = fsnotify_obtain_group(grp_num, 0, &inotify_fsnotify_ops);
624 if (IS_ERR(group)) 704 if (IS_ERR(group))
625 return group; 705 return group;
626 706
@@ -726,7 +806,7 @@ fput_and_out:
726SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) 806SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
727{ 807{
728 struct fsnotify_group *group; 808 struct fsnotify_group *group;
729 struct fsnotify_mark_entry *entry; 809 struct inotify_inode_mark *i_mark;
730 struct file *filp; 810 struct file *filp;
731 int ret = 0, fput_needed; 811 int ret = 0, fput_needed;
732 812
@@ -735,25 +815,23 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd)
735 return -EBADF; 815 return -EBADF;
736 816
737 /* verify that this is indeed an inotify instance */ 817 /* verify that this is indeed an inotify instance */
738 if (unlikely(filp->f_op != &inotify_fops)) { 818 ret = -EINVAL;
739 ret = -EINVAL; 819 if (unlikely(filp->f_op != &inotify_fops))
740 goto out; 820 goto out;
741 }
742 821
743 group = filp->private_data; 822 group = filp->private_data;
744 823
745 spin_lock(&group->inotify_data.idr_lock); 824 ret = -EINVAL;
746 entry = idr_find(&group->inotify_data.idr, wd); 825 i_mark = inotify_idr_find(group, wd);
747 if (unlikely(!entry)) { 826 if (unlikely(!i_mark))
748 spin_unlock(&group->inotify_data.idr_lock);
749 ret = -EINVAL;
750 goto out; 827 goto out;
751 }
752 fsnotify_get_mark(entry);
753 spin_unlock(&group->inotify_data.idr_lock);
754 828
755 fsnotify_destroy_mark_by_entry(entry); 829 ret = 0;
756 fsnotify_put_mark(entry); 830
831 fsnotify_destroy_mark(&i_mark->fsn_mark);
832
833 /* match ref taken by inotify_idr_find */
834 fsnotify_put_mark(&i_mark->fsn_mark);
757 835
758out: 836out:
759 fput_light(filp, fput_needed); 837 fput_light(filp, fput_needed);
@@ -767,7 +845,28 @@ out:
767 */ 845 */
768static int __init inotify_user_setup(void) 846static int __init inotify_user_setup(void)
769{ 847{
770 inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC); 848 BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
849 BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
850 BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
851 BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE);
852 BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
853 BUILD_BUG_ON(IN_OPEN != FS_OPEN);
854 BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM);
855 BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO);
856 BUILD_BUG_ON(IN_CREATE != FS_CREATE);
857 BUILD_BUG_ON(IN_DELETE != FS_DELETE);
858 BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF);
859 BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF);
860 BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT);
861 BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
862 BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
863 BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
864 BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
865 BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
866
867 BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
868
869 inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
771 event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC); 870 event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
772 871
773 inotify_max_queued_events = 16384; 872 inotify_max_queued_events = 16384;