aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <nickpiggin@yahoo.com.au>2006-03-25 06:07:09 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-25 11:22:53 -0500
commitc32ccd87bfd1414b0aabfcd8dbc7539ad23bcbaa (patch)
tree612dc637976cbe36e8b72924a1f7bd76e75463fd
parentbf36b9011e3c5b2739f9da2f6de8a6fa3edded32 (diff)
[PATCH] inotify: lock avoidance with parent watch status in dentry
Previous inotify work avoidance is good when inotify is completely unused, but it breaks down if even a single watch is in place anywhere in the system. Robin Holt notices that udev is one such culprit - it slows down a 512-thread application on a 512 CPU system from 6 seconds to 22 minutes. Solve this by adding a flag in the dentry that tells inotify whether or not its parent inode has a watch on it. Event queueing to parent will skip taking locks if this flag is cleared. Setting and clearing of this flag on all child dentries versus event delivery: this is no in terms of race cases, and that was shown to be equivalent to always performing the check. The essential behaviour is that activity occuring _after_ a watch has been added and _before_ it has been removed, will generate events. Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Robert Love <rml@novell.com> Cc: John McCutchan <ttb@tentacle.dhs.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/dcache.c8
-rw-r--r--fs/inotify.c87
-rw-r--r--include/linux/dcache.h2
-rw-r--r--include/linux/fsnotify.h19
-rw-r--r--include/linux/inotify.h11
5 files changed, 117 insertions, 10 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index 139e5fd22fa6..0f7ec12d65ff 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -802,6 +802,7 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
802 if (inode) 802 if (inode)
803 list_add(&entry->d_alias, &inode->i_dentry); 803 list_add(&entry->d_alias, &inode->i_dentry);
804 entry->d_inode = inode; 804 entry->d_inode = inode;
805 fsnotify_d_instantiate(entry, inode);
805 spin_unlock(&dcache_lock); 806 spin_unlock(&dcache_lock);
806 security_d_instantiate(entry, inode); 807 security_d_instantiate(entry, inode);
807} 808}
@@ -853,6 +854,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
853 list_add(&entry->d_alias, &inode->i_dentry); 854 list_add(&entry->d_alias, &inode->i_dentry);
854do_negative: 855do_negative:
855 entry->d_inode = inode; 856 entry->d_inode = inode;
857 fsnotify_d_instantiate(entry, inode);
856 spin_unlock(&dcache_lock); 858 spin_unlock(&dcache_lock);
857 security_d_instantiate(entry, inode); 859 security_d_instantiate(entry, inode);
858 return NULL; 860 return NULL;
@@ -983,6 +985,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
983 new = __d_find_alias(inode, 1); 985 new = __d_find_alias(inode, 1);
984 if (new) { 986 if (new) {
985 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 987 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
988 fsnotify_d_instantiate(new, inode);
986 spin_unlock(&dcache_lock); 989 spin_unlock(&dcache_lock);
987 security_d_instantiate(new, inode); 990 security_d_instantiate(new, inode);
988 d_rehash(dentry); 991 d_rehash(dentry);
@@ -992,6 +995,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
992 /* d_instantiate takes dcache_lock, so we do it by hand */ 995 /* d_instantiate takes dcache_lock, so we do it by hand */
993 list_add(&dentry->d_alias, &inode->i_dentry); 996 list_add(&dentry->d_alias, &inode->i_dentry);
994 dentry->d_inode = inode; 997 dentry->d_inode = inode;
998 fsnotify_d_instantiate(dentry, inode);
995 spin_unlock(&dcache_lock); 999 spin_unlock(&dcache_lock);
996 security_d_instantiate(dentry, inode); 1000 security_d_instantiate(dentry, inode);
997 d_rehash(dentry); 1001 d_rehash(dentry);
@@ -1176,6 +1180,9 @@ void d_delete(struct dentry * dentry)
1176 spin_lock(&dentry->d_lock); 1180 spin_lock(&dentry->d_lock);
1177 isdir = S_ISDIR(dentry->d_inode->i_mode); 1181 isdir = S_ISDIR(dentry->d_inode->i_mode);
1178 if (atomic_read(&dentry->d_count) == 1) { 1182 if (atomic_read(&dentry->d_count) == 1) {
1183 /* remove this and other inotify debug checks after 2.6.18 */
1184 dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
1185
1179 dentry_iput(dentry); 1186 dentry_iput(dentry);
1180 fsnotify_nameremove(dentry, isdir); 1187 fsnotify_nameremove(dentry, isdir);
1181 return; 1188 return;
@@ -1342,6 +1349,7 @@ already_unhashed:
1342 1349
1343 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); 1350 list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
1344 spin_unlock(&target->d_lock); 1351 spin_unlock(&target->d_lock);
1352 fsnotify_d_move(dentry);
1345 spin_unlock(&dentry->d_lock); 1353 spin_unlock(&dentry->d_lock);
1346 write_sequnlock(&rename_lock); 1354 write_sequnlock(&rename_lock);
1347 spin_unlock(&dcache_lock); 1355 spin_unlock(&dcache_lock);
diff --git a/fs/inotify.c b/fs/inotify.c
index 0ee39ef591c6..a61e93e17853 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -38,7 +38,6 @@
38#include <asm/ioctls.h> 38#include <asm/ioctls.h>
39 39
40static atomic_t inotify_cookie; 40static atomic_t inotify_cookie;
41static atomic_t inotify_watches;
42 41
43static kmem_cache_t *watch_cachep; 42static kmem_cache_t *watch_cachep;
44static kmem_cache_t *event_cachep; 43static kmem_cache_t *event_cachep;
@@ -381,6 +380,48 @@ static int find_inode(const char __user *dirname, struct nameidata *nd,
381} 380}
382 381
383/* 382/*
383 * inotify_inode_watched - returns nonzero if there are watches on this inode
384 * and zero otherwise. We call this lockless, we do not care if we race.
385 */
386static inline int inotify_inode_watched(struct inode *inode)
387{
388 return !list_empty(&inode->inotify_watches);
389}
390
391/*
392 * Get child dentry flag into synch with parent inode.
393 * Flag should always be clear for negative dentrys.
394 */
395static void set_dentry_child_flags(struct inode *inode, int watched)
396{
397 struct dentry *alias;
398
399 spin_lock(&dcache_lock);
400 list_for_each_entry(alias, &inode->i_dentry, d_alias) {
401 struct dentry *child;
402
403 list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
404 if (!child->d_inode) {
405 WARN_ON(child->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
406 continue;
407 }
408 spin_lock(&child->d_lock);
409 if (watched) {
410 WARN_ON(child->d_flags &
411 DCACHE_INOTIFY_PARENT_WATCHED);
412 child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
413 } else {
414 WARN_ON(!(child->d_flags &
415 DCACHE_INOTIFY_PARENT_WATCHED));
416 child->d_flags&=~DCACHE_INOTIFY_PARENT_WATCHED;
417 }
418 spin_unlock(&child->d_lock);
419 }
420 }
421 spin_unlock(&dcache_lock);
422}
423
424/*
384 * create_watch - creates a watch on the given device. 425 * create_watch - creates a watch on the given device.
385 * 426 *
386 * Callers must hold dev->mutex. Calls inotify_dev_get_wd() so may sleep. 427 * Callers must hold dev->mutex. Calls inotify_dev_get_wd() so may sleep.
@@ -426,7 +467,6 @@ static struct inotify_watch *create_watch(struct inotify_device *dev,
426 get_inotify_watch(watch); 467 get_inotify_watch(watch);
427 468
428 atomic_inc(&dev->user->inotify_watches); 469 atomic_inc(&dev->user->inotify_watches);
429 atomic_inc(&inotify_watches);
430 470
431 return watch; 471 return watch;
432} 472}
@@ -458,8 +498,10 @@ static void remove_watch_no_event(struct inotify_watch *watch,
458 list_del(&watch->i_list); 498 list_del(&watch->i_list);
459 list_del(&watch->d_list); 499 list_del(&watch->d_list);
460 500
501 if (!inotify_inode_watched(watch->inode))
502 set_dentry_child_flags(watch->inode, 0);
503
461 atomic_dec(&dev->user->inotify_watches); 504 atomic_dec(&dev->user->inotify_watches);
462 atomic_dec(&inotify_watches);
463 idr_remove(&dev->idr, watch->wd); 505 idr_remove(&dev->idr, watch->wd);
464 put_inotify_watch(watch); 506 put_inotify_watch(watch);
465} 507}
@@ -481,16 +523,39 @@ static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev)
481 remove_watch_no_event(watch, dev); 523 remove_watch_no_event(watch, dev);
482} 524}
483 525
526/* Kernel API */
527
484/* 528/*
485 * inotify_inode_watched - returns nonzero if there are watches on this inode 529 * inotify_d_instantiate - instantiate dcache entry for inode
486 * and zero otherwise. We call this lockless, we do not care if we race.
487 */ 530 */
488static inline int inotify_inode_watched(struct inode *inode) 531void inotify_d_instantiate(struct dentry *entry, struct inode *inode)
489{ 532{
490 return !list_empty(&inode->inotify_watches); 533 struct dentry *parent;
534
535 if (!inode)
536 return;
537
538 WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED);
539 spin_lock(&entry->d_lock);
540 parent = entry->d_parent;
541 if (inotify_inode_watched(parent->d_inode))
542 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
543 spin_unlock(&entry->d_lock);
491} 544}
492 545
493/* Kernel API */ 546/*
547 * inotify_d_move - dcache entry has been moved
548 */
549void inotify_d_move(struct dentry *entry)
550{
551 struct dentry *parent;
552
553 parent = entry->d_parent;
554 if (inotify_inode_watched(parent->d_inode))
555 entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED;
556 else
557 entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED;
558}
494 559
495/** 560/**
496 * inotify_inode_queue_event - queue an event to all watches on this inode 561 * inotify_inode_queue_event - queue an event to all watches on this inode
@@ -538,7 +603,7 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask,
538 struct dentry *parent; 603 struct dentry *parent;
539 struct inode *inode; 604 struct inode *inode;
540 605
541 if (!atomic_read (&inotify_watches)) 606 if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED))
542 return; 607 return;
543 608
544 spin_lock(&dentry->d_lock); 609 spin_lock(&dentry->d_lock);
@@ -993,6 +1058,9 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
993 goto out; 1058 goto out;
994 } 1059 }
995 1060
1061 if (!inotify_inode_watched(inode))
1062 set_dentry_child_flags(inode, 1);
1063
996 /* Add the watch to the device's and the inode's list */ 1064 /* Add the watch to the device's and the inode's list */
997 list_add(&watch->d_list, &dev->watches); 1065 list_add(&watch->d_list, &dev->watches);
998 list_add(&watch->i_list, &inode->inotify_watches); 1066 list_add(&watch->i_list, &inode->inotify_watches);
@@ -1065,7 +1133,6 @@ static int __init inotify_setup(void)
1065 inotify_max_user_watches = 8192; 1133 inotify_max_user_watches = 8192;
1066 1134
1067 atomic_set(&inotify_cookie, 0); 1135 atomic_set(&inotify_cookie, 0);
1068 atomic_set(&inotify_watches, 0);
1069 1136
1070 watch_cachep = kmem_cache_create("inotify_watch_cache", 1137 watch_cachep = kmem_cache_create("inotify_watch_cache",
1071 sizeof(struct inotify_watch), 1138 sizeof(struct inotify_watch),
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 4361f3789975..d10bd30c337e 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -162,6 +162,8 @@ d_iput: no no no yes
162#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ 162#define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
163#define DCACHE_UNHASHED 0x0010 163#define DCACHE_UNHASHED 0x0010
164 164
165#define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */
166
165extern spinlock_t dcache_lock; 167extern spinlock_t dcache_lock;
166 168
167/** 169/**
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h
index 03b8e7932b83..f7e517c1f1bd 100644
--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -17,6 +17,25 @@
17#include <linux/inotify.h> 17#include <linux/inotify.h>
18 18
19/* 19/*
20 * fsnotify_d_instantiate - instantiate a dentry for inode
21 * Called with dcache_lock held.
22 */
23static inline void fsnotify_d_instantiate(struct dentry *entry,
24 struct inode *inode)
25{
26 inotify_d_instantiate(entry, inode);
27}
28
29/*
30 * fsnotify_d_move - entry has been moved
31 * Called with dcache_lock and entry->d_lock held.
32 */
33static inline void fsnotify_d_move(struct dentry *entry)
34{
35 inotify_d_move(entry);
36}
37
38/*
20 * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir 39 * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir
21 */ 40 */
22static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, 41static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir,
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 267c88b5f742..09e00433c78e 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -71,6 +71,8 @@ struct inotify_event {
71 71
72#ifdef CONFIG_INOTIFY 72#ifdef CONFIG_INOTIFY
73 73
74extern void inotify_d_instantiate(struct dentry *, struct inode *);
75extern void inotify_d_move(struct dentry *);
74extern void inotify_inode_queue_event(struct inode *, __u32, __u32, 76extern void inotify_inode_queue_event(struct inode *, __u32, __u32,
75 const char *); 77 const char *);
76extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32, 78extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32,
@@ -81,6 +83,15 @@ extern u32 inotify_get_cookie(void);
81 83
82#else 84#else
83 85
86static inline void inotify_d_instantiate(struct dentry *dentry,
87 struct inode *inode)
88{
89}
90
91static inline void inotify_d_move(struct dentry *dentry)
92{
93}
94
84static inline void inotify_inode_queue_event(struct inode *inode, 95static inline void inotify_inode_queue_event(struct inode *inode,
85 __u32 mask, __u32 cookie, 96 __u32 mask, __u32 cookie,
86 const char *filename) 97 const char *filename)