diff options
author | Miklos Szeredi <mszeredi@suse.cz> | 2007-05-08 03:23:46 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-08 14:14:58 -0400 |
commit | d52b908646b88cb1952ab8c9b2d4423908a23f11 (patch) | |
tree | 0c60c3bdbffaf87c7d9cff30e5e598ce31d4789b | |
parent | 97dc32cdb1b53832801159d5f634b41aad9d0a23 (diff) |
fix quadratic behavior of shrink_dcache_parent()
The time shrink_dcache_parent() takes, grows quadratically with the depth
of the tree under 'parent'. This starts to get noticable at about 10,000.
These kinds of depths don't occur normally, and filesystems which invoke
shrink_dcache_parent() via d_invalidate() seem to have other depth
dependent timings, so it's not even easy to expose this problem.
However with FUSE it's easy to create a deep tree and d_invalidate()
will also get called. This can make a syscall hang for a very long
time.
This is the original discovery of the problem by Russ Cox:
http://article.gmane.org/gmane.comp.file-systems.fuse.devel/3826
The following patch fixes the quadratic behavior, by optionally allowing
prune_dcache() to prune ancestors of a dentry in one go, instead of doing
it one at a time.
Common code in dput() and prune_one_dentry() is extracted into a new helper
function d_kill().
shrink_dcache_parent() as well as shrink_dcache_sb() are converted to use
the ancestry-pruner option. Only for shrink_dcache_memory() is this
behavior not desirable, so it keeps using the old algorithm.
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Maneesh Soni <maneesh@in.ibm.com>
Acked-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Neil Brown <neilb@suse.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/dcache.c | 104 |
1 files changed, 67 insertions, 37 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index d1bf5d8aeb5a..681cab81b454 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -121,6 +121,28 @@ static void dentry_iput(struct dentry * dentry) | |||
121 | } | 121 | } |
122 | } | 122 | } |
123 | 123 | ||
124 | /** | ||
125 | * d_kill - kill dentry and return parent | ||
126 | * @dentry: dentry to kill | ||
127 | * | ||
128 | * Called with dcache_lock and d_lock, releases both. The dentry must | ||
129 | * already be unhashed and removed from the LRU. | ||
130 | * | ||
131 | * If this is the root of the dentry tree, return NULL. | ||
132 | */ | ||
133 | static struct dentry *d_kill(struct dentry *dentry) | ||
134 | { | ||
135 | struct dentry *parent; | ||
136 | |||
137 | list_del(&dentry->d_u.d_child); | ||
138 | dentry_stat.nr_dentry--; /* For d_free, below */ | ||
139 | /*drops the locks, at that point nobody can reach this dentry */ | ||
140 | dentry_iput(dentry); | ||
141 | parent = dentry->d_parent; | ||
142 | d_free(dentry); | ||
143 | return dentry == parent ? NULL : parent; | ||
144 | } | ||
145 | |||
124 | /* | 146 | /* |
125 | * This is dput | 147 | * This is dput |
126 | * | 148 | * |
@@ -189,28 +211,17 @@ repeat: | |||
189 | 211 | ||
190 | unhash_it: | 212 | unhash_it: |
191 | __d_drop(dentry); | 213 | __d_drop(dentry); |
192 | 214 | kill_it: | |
193 | kill_it: { | 215 | /* If dentry was on d_lru list |
194 | struct dentry *parent; | 216 | * delete it from there |
195 | 217 | */ | |
196 | /* If dentry was on d_lru list | 218 | if (!list_empty(&dentry->d_lru)) { |
197 | * delete it from there | 219 | list_del(&dentry->d_lru); |
198 | */ | 220 | dentry_stat.nr_unused--; |
199 | if (!list_empty(&dentry->d_lru)) { | ||
200 | list_del(&dentry->d_lru); | ||
201 | dentry_stat.nr_unused--; | ||
202 | } | ||
203 | list_del(&dentry->d_u.d_child); | ||
204 | dentry_stat.nr_dentry--; /* For d_free, below */ | ||
205 | /*drops the locks, at that point nobody can reach this dentry */ | ||
206 | dentry_iput(dentry); | ||
207 | parent = dentry->d_parent; | ||
208 | d_free(dentry); | ||
209 | if (dentry == parent) | ||
210 | return; | ||
211 | dentry = parent; | ||
212 | goto repeat; | ||
213 | } | 221 | } |
222 | dentry = d_kill(dentry); | ||
223 | if (dentry) | ||
224 | goto repeat; | ||
214 | } | 225 | } |
215 | 226 | ||
216 | /** | 227 | /** |
@@ -371,22 +382,40 @@ restart: | |||
371 | * Throw away a dentry - free the inode, dput the parent. This requires that | 382 | * Throw away a dentry - free the inode, dput the parent. This requires that |
372 | * the LRU list has already been removed. | 383 | * the LRU list has already been removed. |
373 | * | 384 | * |
385 | * If prune_parents is true, try to prune ancestors as well. | ||
386 | * | ||
374 | * Called with dcache_lock, drops it and then regains. | 387 | * Called with dcache_lock, drops it and then regains. |
375 | * Called with dentry->d_lock held, drops it. | 388 | * Called with dentry->d_lock held, drops it. |
376 | */ | 389 | */ |
377 | static void prune_one_dentry(struct dentry * dentry) | 390 | static void prune_one_dentry(struct dentry * dentry, int prune_parents) |
378 | { | 391 | { |
379 | struct dentry * parent; | ||
380 | |||
381 | __d_drop(dentry); | 392 | __d_drop(dentry); |
382 | list_del(&dentry->d_u.d_child); | 393 | dentry = d_kill(dentry); |
383 | dentry_stat.nr_dentry--; /* For d_free, below */ | 394 | if (!prune_parents) { |
384 | dentry_iput(dentry); | 395 | dput(dentry); |
385 | parent = dentry->d_parent; | 396 | spin_lock(&dcache_lock); |
386 | d_free(dentry); | 397 | return; |
387 | if (parent != dentry) | 398 | } |
388 | dput(parent); | 399 | |
400 | /* | ||
401 | * Prune ancestors. Locking is simpler than in dput(), | ||
402 | * because dcache_lock needs to be taken anyway. | ||
403 | */ | ||
389 | spin_lock(&dcache_lock); | 404 | spin_lock(&dcache_lock); |
405 | while (dentry) { | ||
406 | if (!atomic_dec_and_lock(&dentry->d_count, &dentry->d_lock)) | ||
407 | return; | ||
408 | |||
409 | if (dentry->d_op && dentry->d_op->d_delete) | ||
410 | dentry->d_op->d_delete(dentry); | ||
411 | if (!list_empty(&dentry->d_lru)) { | ||
412 | list_del(&dentry->d_lru); | ||
413 | dentry_stat.nr_unused--; | ||
414 | } | ||
415 | __d_drop(dentry); | ||
416 | dentry = d_kill(dentry); | ||
417 | spin_lock(&dcache_lock); | ||
418 | } | ||
390 | } | 419 | } |
391 | 420 | ||
392 | /** | 421 | /** |
@@ -394,6 +423,7 @@ static void prune_one_dentry(struct dentry * dentry) | |||
394 | * @count: number of entries to try and free | 423 | * @count: number of entries to try and free |
395 | * @sb: if given, ignore dentries for other superblocks | 424 | * @sb: if given, ignore dentries for other superblocks |
396 | * which are being unmounted. | 425 | * which are being unmounted. |
426 | * @prune_parents: if true, try to prune ancestors as well in one go | ||
397 | * | 427 | * |
398 | * Shrink the dcache. This is done when we need | 428 | * Shrink the dcache. This is done when we need |
399 | * more memory, or simply when we need to unmount | 429 | * more memory, or simply when we need to unmount |
@@ -404,7 +434,7 @@ static void prune_one_dentry(struct dentry * dentry) | |||
404 | * all the dentries are in use. | 434 | * all the dentries are in use. |
405 | */ | 435 | */ |
406 | 436 | ||
407 | static void prune_dcache(int count, struct super_block *sb) | 437 | static void prune_dcache(int count, struct super_block *sb, int prune_parents) |
408 | { | 438 | { |
409 | spin_lock(&dcache_lock); | 439 | spin_lock(&dcache_lock); |
410 | for (; count ; count--) { | 440 | for (; count ; count--) { |
@@ -464,7 +494,7 @@ static void prune_dcache(int count, struct super_block *sb) | |||
464 | * without taking the s_umount lock (I already hold it). | 494 | * without taking the s_umount lock (I already hold it). |
465 | */ | 495 | */ |
466 | if (sb && dentry->d_sb == sb) { | 496 | if (sb && dentry->d_sb == sb) { |
467 | prune_one_dentry(dentry); | 497 | prune_one_dentry(dentry, prune_parents); |
468 | continue; | 498 | continue; |
469 | } | 499 | } |
470 | /* | 500 | /* |
@@ -479,7 +509,7 @@ static void prune_dcache(int count, struct super_block *sb) | |||
479 | s_umount = &dentry->d_sb->s_umount; | 509 | s_umount = &dentry->d_sb->s_umount; |
480 | if (down_read_trylock(s_umount)) { | 510 | if (down_read_trylock(s_umount)) { |
481 | if (dentry->d_sb->s_root != NULL) { | 511 | if (dentry->d_sb->s_root != NULL) { |
482 | prune_one_dentry(dentry); | 512 | prune_one_dentry(dentry, prune_parents); |
483 | up_read(s_umount); | 513 | up_read(s_umount); |
484 | continue; | 514 | continue; |
485 | } | 515 | } |
@@ -550,7 +580,7 @@ repeat: | |||
550 | spin_unlock(&dentry->d_lock); | 580 | spin_unlock(&dentry->d_lock); |
551 | continue; | 581 | continue; |
552 | } | 582 | } |
553 | prune_one_dentry(dentry); | 583 | prune_one_dentry(dentry, 1); |
554 | cond_resched_lock(&dcache_lock); | 584 | cond_resched_lock(&dcache_lock); |
555 | goto repeat; | 585 | goto repeat; |
556 | } | 586 | } |
@@ -829,7 +859,7 @@ void shrink_dcache_parent(struct dentry * parent) | |||
829 | int found; | 859 | int found; |
830 | 860 | ||
831 | while ((found = select_parent(parent)) != 0) | 861 | while ((found = select_parent(parent)) != 0) |
832 | prune_dcache(found, parent->d_sb); | 862 | prune_dcache(found, parent->d_sb, 1); |
833 | } | 863 | } |
834 | 864 | ||
835 | /* | 865 | /* |
@@ -849,7 +879,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask) | |||
849 | if (nr) { | 879 | if (nr) { |
850 | if (!(gfp_mask & __GFP_FS)) | 880 | if (!(gfp_mask & __GFP_FS)) |
851 | return -1; | 881 | return -1; |
852 | prune_dcache(nr, NULL); | 882 | prune_dcache(nr, NULL, 0); |
853 | } | 883 | } |
854 | return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; | 884 | return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; |
855 | } | 885 | } |