diff options
author | Nick Piggin <nickpiggin@yahoo.com.au> | 2006-03-25 06:07:09 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-25 11:22:53 -0500 |
commit | c32ccd87bfd1414b0aabfcd8dbc7539ad23bcbaa (patch) | |
tree | 612dc637976cbe36e8b72924a1f7bd76e75463fd | |
parent | bf36b9011e3c5b2739f9da2f6de8a6fa3edded32 (diff) |
[PATCH] inotify: lock avoidance with parent watch status in dentry
Previous inotify work avoidance is good when inotify is completely unused,
but it breaks down if even a single watch is in place anywhere in the
system. Robin Holt notices that udev is one such culprit - it slows down a
512-thread application on a 512 CPU system from 6 seconds to 22 minutes.
Solve this by adding a flag in the dentry that tells inotify whether or not
its parent inode has a watch on it. Event queueing to parent will skip
taking locks if this flag is cleared. Setting and clearing of this flag on
all child dentries versus event delivery: this is no in terms of race
cases, and that was shown to be equivalent to always performing the check.
The essential behaviour is that activity occuring _after_ a watch has been
added and _before_ it has been removed, will generate events.
Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Robert Love <rml@novell.com>
Cc: John McCutchan <ttb@tentacle.dhs.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | fs/dcache.c | 8 | ||||
-rw-r--r-- | fs/inotify.c | 87 | ||||
-rw-r--r-- | include/linux/dcache.h | 2 | ||||
-rw-r--r-- | include/linux/fsnotify.h | 19 | ||||
-rw-r--r-- | include/linux/inotify.h | 11 |
5 files changed, 117 insertions, 10 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index 139e5fd22fa6..0f7ec12d65ff 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -802,6 +802,7 @@ void d_instantiate(struct dentry *entry, struct inode * inode) | |||
802 | if (inode) | 802 | if (inode) |
803 | list_add(&entry->d_alias, &inode->i_dentry); | 803 | list_add(&entry->d_alias, &inode->i_dentry); |
804 | entry->d_inode = inode; | 804 | entry->d_inode = inode; |
805 | fsnotify_d_instantiate(entry, inode); | ||
805 | spin_unlock(&dcache_lock); | 806 | spin_unlock(&dcache_lock); |
806 | security_d_instantiate(entry, inode); | 807 | security_d_instantiate(entry, inode); |
807 | } | 808 | } |
@@ -853,6 +854,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) | |||
853 | list_add(&entry->d_alias, &inode->i_dentry); | 854 | list_add(&entry->d_alias, &inode->i_dentry); |
854 | do_negative: | 855 | do_negative: |
855 | entry->d_inode = inode; | 856 | entry->d_inode = inode; |
857 | fsnotify_d_instantiate(entry, inode); | ||
856 | spin_unlock(&dcache_lock); | 858 | spin_unlock(&dcache_lock); |
857 | security_d_instantiate(entry, inode); | 859 | security_d_instantiate(entry, inode); |
858 | return NULL; | 860 | return NULL; |
@@ -983,6 +985,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
983 | new = __d_find_alias(inode, 1); | 985 | new = __d_find_alias(inode, 1); |
984 | if (new) { | 986 | if (new) { |
985 | BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); | 987 | BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); |
988 | fsnotify_d_instantiate(new, inode); | ||
986 | spin_unlock(&dcache_lock); | 989 | spin_unlock(&dcache_lock); |
987 | security_d_instantiate(new, inode); | 990 | security_d_instantiate(new, inode); |
988 | d_rehash(dentry); | 991 | d_rehash(dentry); |
@@ -992,6 +995,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
992 | /* d_instantiate takes dcache_lock, so we do it by hand */ | 995 | /* d_instantiate takes dcache_lock, so we do it by hand */ |
993 | list_add(&dentry->d_alias, &inode->i_dentry); | 996 | list_add(&dentry->d_alias, &inode->i_dentry); |
994 | dentry->d_inode = inode; | 997 | dentry->d_inode = inode; |
998 | fsnotify_d_instantiate(dentry, inode); | ||
995 | spin_unlock(&dcache_lock); | 999 | spin_unlock(&dcache_lock); |
996 | security_d_instantiate(dentry, inode); | 1000 | security_d_instantiate(dentry, inode); |
997 | d_rehash(dentry); | 1001 | d_rehash(dentry); |
@@ -1176,6 +1180,9 @@ void d_delete(struct dentry * dentry) | |||
1176 | spin_lock(&dentry->d_lock); | 1180 | spin_lock(&dentry->d_lock); |
1177 | isdir = S_ISDIR(dentry->d_inode->i_mode); | 1181 | isdir = S_ISDIR(dentry->d_inode->i_mode); |
1178 | if (atomic_read(&dentry->d_count) == 1) { | 1182 | if (atomic_read(&dentry->d_count) == 1) { |
1183 | /* remove this and other inotify debug checks after 2.6.18 */ | ||
1184 | dentry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED; | ||
1185 | |||
1179 | dentry_iput(dentry); | 1186 | dentry_iput(dentry); |
1180 | fsnotify_nameremove(dentry, isdir); | 1187 | fsnotify_nameremove(dentry, isdir); |
1181 | return; | 1188 | return; |
@@ -1342,6 +1349,7 @@ already_unhashed: | |||
1342 | 1349 | ||
1343 | list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); | 1350 | list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs); |
1344 | spin_unlock(&target->d_lock); | 1351 | spin_unlock(&target->d_lock); |
1352 | fsnotify_d_move(dentry); | ||
1345 | spin_unlock(&dentry->d_lock); | 1353 | spin_unlock(&dentry->d_lock); |
1346 | write_sequnlock(&rename_lock); | 1354 | write_sequnlock(&rename_lock); |
1347 | spin_unlock(&dcache_lock); | 1355 | spin_unlock(&dcache_lock); |
diff --git a/fs/inotify.c b/fs/inotify.c index 0ee39ef591c6..a61e93e17853 100644 --- a/fs/inotify.c +++ b/fs/inotify.c | |||
@@ -38,7 +38,6 @@ | |||
38 | #include <asm/ioctls.h> | 38 | #include <asm/ioctls.h> |
39 | 39 | ||
40 | static atomic_t inotify_cookie; | 40 | static atomic_t inotify_cookie; |
41 | static atomic_t inotify_watches; | ||
42 | 41 | ||
43 | static kmem_cache_t *watch_cachep; | 42 | static kmem_cache_t *watch_cachep; |
44 | static kmem_cache_t *event_cachep; | 43 | static kmem_cache_t *event_cachep; |
@@ -381,6 +380,48 @@ static int find_inode(const char __user *dirname, struct nameidata *nd, | |||
381 | } | 380 | } |
382 | 381 | ||
383 | /* | 382 | /* |
383 | * inotify_inode_watched - returns nonzero if there are watches on this inode | ||
384 | * and zero otherwise. We call this lockless, we do not care if we race. | ||
385 | */ | ||
386 | static inline int inotify_inode_watched(struct inode *inode) | ||
387 | { | ||
388 | return !list_empty(&inode->inotify_watches); | ||
389 | } | ||
390 | |||
391 | /* | ||
392 | * Get child dentry flag into synch with parent inode. | ||
393 | * Flag should always be clear for negative dentrys. | ||
394 | */ | ||
395 | static void set_dentry_child_flags(struct inode *inode, int watched) | ||
396 | { | ||
397 | struct dentry *alias; | ||
398 | |||
399 | spin_lock(&dcache_lock); | ||
400 | list_for_each_entry(alias, &inode->i_dentry, d_alias) { | ||
401 | struct dentry *child; | ||
402 | |||
403 | list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) { | ||
404 | if (!child->d_inode) { | ||
405 | WARN_ON(child->d_flags & DCACHE_INOTIFY_PARENT_WATCHED); | ||
406 | continue; | ||
407 | } | ||
408 | spin_lock(&child->d_lock); | ||
409 | if (watched) { | ||
410 | WARN_ON(child->d_flags & | ||
411 | DCACHE_INOTIFY_PARENT_WATCHED); | ||
412 | child->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; | ||
413 | } else { | ||
414 | WARN_ON(!(child->d_flags & | ||
415 | DCACHE_INOTIFY_PARENT_WATCHED)); | ||
416 | child->d_flags&=~DCACHE_INOTIFY_PARENT_WATCHED; | ||
417 | } | ||
418 | spin_unlock(&child->d_lock); | ||
419 | } | ||
420 | } | ||
421 | spin_unlock(&dcache_lock); | ||
422 | } | ||
423 | |||
424 | /* | ||
384 | * create_watch - creates a watch on the given device. | 425 | * create_watch - creates a watch on the given device. |
385 | * | 426 | * |
386 | * Callers must hold dev->mutex. Calls inotify_dev_get_wd() so may sleep. | 427 | * Callers must hold dev->mutex. Calls inotify_dev_get_wd() so may sleep. |
@@ -426,7 +467,6 @@ static struct inotify_watch *create_watch(struct inotify_device *dev, | |||
426 | get_inotify_watch(watch); | 467 | get_inotify_watch(watch); |
427 | 468 | ||
428 | atomic_inc(&dev->user->inotify_watches); | 469 | atomic_inc(&dev->user->inotify_watches); |
429 | atomic_inc(&inotify_watches); | ||
430 | 470 | ||
431 | return watch; | 471 | return watch; |
432 | } | 472 | } |
@@ -458,8 +498,10 @@ static void remove_watch_no_event(struct inotify_watch *watch, | |||
458 | list_del(&watch->i_list); | 498 | list_del(&watch->i_list); |
459 | list_del(&watch->d_list); | 499 | list_del(&watch->d_list); |
460 | 500 | ||
501 | if (!inotify_inode_watched(watch->inode)) | ||
502 | set_dentry_child_flags(watch->inode, 0); | ||
503 | |||
461 | atomic_dec(&dev->user->inotify_watches); | 504 | atomic_dec(&dev->user->inotify_watches); |
462 | atomic_dec(&inotify_watches); | ||
463 | idr_remove(&dev->idr, watch->wd); | 505 | idr_remove(&dev->idr, watch->wd); |
464 | put_inotify_watch(watch); | 506 | put_inotify_watch(watch); |
465 | } | 507 | } |
@@ -481,16 +523,39 @@ static void remove_watch(struct inotify_watch *watch,struct inotify_device *dev) | |||
481 | remove_watch_no_event(watch, dev); | 523 | remove_watch_no_event(watch, dev); |
482 | } | 524 | } |
483 | 525 | ||
526 | /* Kernel API */ | ||
527 | |||
484 | /* | 528 | /* |
485 | * inotify_inode_watched - returns nonzero if there are watches on this inode | 529 | * inotify_d_instantiate - instantiate dcache entry for inode |
486 | * and zero otherwise. We call this lockless, we do not care if we race. | ||
487 | */ | 530 | */ |
488 | static inline int inotify_inode_watched(struct inode *inode) | 531 | void inotify_d_instantiate(struct dentry *entry, struct inode *inode) |
489 | { | 532 | { |
490 | return !list_empty(&inode->inotify_watches); | 533 | struct dentry *parent; |
534 | |||
535 | if (!inode) | ||
536 | return; | ||
537 | |||
538 | WARN_ON(entry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED); | ||
539 | spin_lock(&entry->d_lock); | ||
540 | parent = entry->d_parent; | ||
541 | if (inotify_inode_watched(parent->d_inode)) | ||
542 | entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; | ||
543 | spin_unlock(&entry->d_lock); | ||
491 | } | 544 | } |
492 | 545 | ||
493 | /* Kernel API */ | 546 | /* |
547 | * inotify_d_move - dcache entry has been moved | ||
548 | */ | ||
549 | void inotify_d_move(struct dentry *entry) | ||
550 | { | ||
551 | struct dentry *parent; | ||
552 | |||
553 | parent = entry->d_parent; | ||
554 | if (inotify_inode_watched(parent->d_inode)) | ||
555 | entry->d_flags |= DCACHE_INOTIFY_PARENT_WATCHED; | ||
556 | else | ||
557 | entry->d_flags &= ~DCACHE_INOTIFY_PARENT_WATCHED; | ||
558 | } | ||
494 | 559 | ||
495 | /** | 560 | /** |
496 | * inotify_inode_queue_event - queue an event to all watches on this inode | 561 | * inotify_inode_queue_event - queue an event to all watches on this inode |
@@ -538,7 +603,7 @@ void inotify_dentry_parent_queue_event(struct dentry *dentry, u32 mask, | |||
538 | struct dentry *parent; | 603 | struct dentry *parent; |
539 | struct inode *inode; | 604 | struct inode *inode; |
540 | 605 | ||
541 | if (!atomic_read (&inotify_watches)) | 606 | if (!(dentry->d_flags & DCACHE_INOTIFY_PARENT_WATCHED)) |
542 | return; | 607 | return; |
543 | 608 | ||
544 | spin_lock(&dentry->d_lock); | 609 | spin_lock(&dentry->d_lock); |
@@ -993,6 +1058,9 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) | |||
993 | goto out; | 1058 | goto out; |
994 | } | 1059 | } |
995 | 1060 | ||
1061 | if (!inotify_inode_watched(inode)) | ||
1062 | set_dentry_child_flags(inode, 1); | ||
1063 | |||
996 | /* Add the watch to the device's and the inode's list */ | 1064 | /* Add the watch to the device's and the inode's list */ |
997 | list_add(&watch->d_list, &dev->watches); | 1065 | list_add(&watch->d_list, &dev->watches); |
998 | list_add(&watch->i_list, &inode->inotify_watches); | 1066 | list_add(&watch->i_list, &inode->inotify_watches); |
@@ -1065,7 +1133,6 @@ static int __init inotify_setup(void) | |||
1065 | inotify_max_user_watches = 8192; | 1133 | inotify_max_user_watches = 8192; |
1066 | 1134 | ||
1067 | atomic_set(&inotify_cookie, 0); | 1135 | atomic_set(&inotify_cookie, 0); |
1068 | atomic_set(&inotify_watches, 0); | ||
1069 | 1136 | ||
1070 | watch_cachep = kmem_cache_create("inotify_watch_cache", | 1137 | watch_cachep = kmem_cache_create("inotify_watch_cache", |
1071 | sizeof(struct inotify_watch), | 1138 | sizeof(struct inotify_watch), |
diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4361f3789975..d10bd30c337e 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h | |||
@@ -162,6 +162,8 @@ d_iput: no no no yes | |||
162 | #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ | 162 | #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ |
163 | #define DCACHE_UNHASHED 0x0010 | 163 | #define DCACHE_UNHASHED 0x0010 |
164 | 164 | ||
165 | #define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */ | ||
166 | |||
165 | extern spinlock_t dcache_lock; | 167 | extern spinlock_t dcache_lock; |
166 | 168 | ||
167 | /** | 169 | /** |
diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 03b8e7932b83..f7e517c1f1bd 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h | |||
@@ -17,6 +17,25 @@ | |||
17 | #include <linux/inotify.h> | 17 | #include <linux/inotify.h> |
18 | 18 | ||
19 | /* | 19 | /* |
20 | * fsnotify_d_instantiate - instantiate a dentry for inode | ||
21 | * Called with dcache_lock held. | ||
22 | */ | ||
23 | static inline void fsnotify_d_instantiate(struct dentry *entry, | ||
24 | struct inode *inode) | ||
25 | { | ||
26 | inotify_d_instantiate(entry, inode); | ||
27 | } | ||
28 | |||
29 | /* | ||
30 | * fsnotify_d_move - entry has been moved | ||
31 | * Called with dcache_lock and entry->d_lock held. | ||
32 | */ | ||
33 | static inline void fsnotify_d_move(struct dentry *entry) | ||
34 | { | ||
35 | inotify_d_move(entry); | ||
36 | } | ||
37 | |||
38 | /* | ||
20 | * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir | 39 | * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir |
21 | */ | 40 | */ |
22 | static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, | 41 | static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, |
diff --git a/include/linux/inotify.h b/include/linux/inotify.h index 267c88b5f742..09e00433c78e 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h | |||
@@ -71,6 +71,8 @@ struct inotify_event { | |||
71 | 71 | ||
72 | #ifdef CONFIG_INOTIFY | 72 | #ifdef CONFIG_INOTIFY |
73 | 73 | ||
74 | extern void inotify_d_instantiate(struct dentry *, struct inode *); | ||
75 | extern void inotify_d_move(struct dentry *); | ||
74 | extern void inotify_inode_queue_event(struct inode *, __u32, __u32, | 76 | extern void inotify_inode_queue_event(struct inode *, __u32, __u32, |
75 | const char *); | 77 | const char *); |
76 | extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32, | 78 | extern void inotify_dentry_parent_queue_event(struct dentry *, __u32, __u32, |
@@ -81,6 +83,15 @@ extern u32 inotify_get_cookie(void); | |||
81 | 83 | ||
82 | #else | 84 | #else |
83 | 85 | ||
86 | static inline void inotify_d_instantiate(struct dentry *dentry, | ||
87 | struct inode *inode) | ||
88 | { | ||
89 | } | ||
90 | |||
91 | static inline void inotify_d_move(struct dentry *dentry) | ||
92 | { | ||
93 | } | ||
94 | |||
84 | static inline void inotify_inode_queue_event(struct inode *inode, | 95 | static inline void inotify_inode_queue_event(struct inode *inode, |
85 | __u32 mask, __u32 cookie, | 96 | __u32 mask, __u32 cookie, |
86 | const char *filename) | 97 | const char *filename) |