diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-05-30 12:52:55 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-05-30 12:52:55 -0400 |
commit | 6f6111e4a73d0f6370eb8be4f8e4523210b6a67d (patch) | |
tree | 18348a2d6793d637f913e25adca67287fdab24d0 /fs | |
parent | fe45736f4134b9656c656ac5e15b915192f2704a (diff) | |
parent | 8cbf74da435d1bd13dbb790f94c7ff67b2fb6af4 (diff) |
Merge branch 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs dcache livelock fix from Al Viro:
"Fixes for livelocks in shrink_dentry_list() introduced by fixes to
shrink list corruption; the root cause was that trylock of parent's
->d_lock could be disrupted by d_walk() happening on other CPUs,
resulting in shrink_dentry_list() making no progress *and* the same
d_walk() being called again and again for as long as
shrink_dentry_list() doesn't get past that mess.
The solution is to have shrink_dentry_list() treat that trylock
failure not as 'try to do the same thing again', but 'lock them in the
right order'"
* 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
dentry_kill() doesn't need the second argument now
dealing with the rest of shrink_dentry_list() livelock
shrink_dentry_list(): take parent's ->d_lock earlier
expand dentry_kill(dentry, 0) in shrink_dentry_list()
split dentry_kill()
lift the "already marked killed" case into shrink_dentry_list()
Diffstat (limited to 'fs')
-rw-r--r-- | fs/dcache.c | 153 |
1 files changed, 107 insertions, 46 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index 42ae01eefc07..bce851dc03ef 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -441,42 +441,12 @@ void d_drop(struct dentry *dentry) | |||
441 | } | 441 | } |
442 | EXPORT_SYMBOL(d_drop); | 442 | EXPORT_SYMBOL(d_drop); |
443 | 443 | ||
444 | /* | 444 | static void __dentry_kill(struct dentry *dentry) |
445 | * Finish off a dentry we've decided to kill. | ||
446 | * dentry->d_lock must be held, returns with it unlocked. | ||
447 | * If ref is non-zero, then decrement the refcount too. | ||
448 | * Returns dentry requiring refcount drop, or NULL if we're done. | ||
449 | */ | ||
450 | static struct dentry * | ||
451 | dentry_kill(struct dentry *dentry, int unlock_on_failure) | ||
452 | __releases(dentry->d_lock) | ||
453 | { | 445 | { |
454 | struct inode *inode; | ||
455 | struct dentry *parent = NULL; | 446 | struct dentry *parent = NULL; |
456 | bool can_free = true; | 447 | bool can_free = true; |
457 | |||
458 | if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { | ||
459 | can_free = dentry->d_flags & DCACHE_MAY_FREE; | ||
460 | spin_unlock(&dentry->d_lock); | ||
461 | goto out; | ||
462 | } | ||
463 | |||
464 | inode = dentry->d_inode; | ||
465 | if (inode && !spin_trylock(&inode->i_lock)) { | ||
466 | relock: | ||
467 | if (unlock_on_failure) { | ||
468 | spin_unlock(&dentry->d_lock); | ||
469 | cpu_relax(); | ||
470 | } | ||
471 | return dentry; /* try again with same dentry */ | ||
472 | } | ||
473 | if (!IS_ROOT(dentry)) | 448 | if (!IS_ROOT(dentry)) |
474 | parent = dentry->d_parent; | 449 | parent = dentry->d_parent; |
475 | if (parent && !spin_trylock(&parent->d_lock)) { | ||
476 | if (inode) | ||
477 | spin_unlock(&inode->i_lock); | ||
478 | goto relock; | ||
479 | } | ||
480 | 450 | ||
481 | /* | 451 | /* |
482 | * The dentry is now unrecoverably dead to the world. | 452 | * The dentry is now unrecoverably dead to the world. |
@@ -520,9 +490,72 @@ relock: | |||
520 | can_free = false; | 490 | can_free = false; |
521 | } | 491 | } |
522 | spin_unlock(&dentry->d_lock); | 492 | spin_unlock(&dentry->d_lock); |
523 | out: | ||
524 | if (likely(can_free)) | 493 | if (likely(can_free)) |
525 | dentry_free(dentry); | 494 | dentry_free(dentry); |
495 | } | ||
496 | |||
497 | /* | ||
498 | * Finish off a dentry we've decided to kill. | ||
499 | * dentry->d_lock must be held, returns with it unlocked. | ||
500 | * If ref is non-zero, then decrement the refcount too. | ||
501 | * Returns dentry requiring refcount drop, or NULL if we're done. | ||
502 | */ | ||
503 | static struct dentry *dentry_kill(struct dentry *dentry) | ||
504 | __releases(dentry->d_lock) | ||
505 | { | ||
506 | struct inode *inode = dentry->d_inode; | ||
507 | struct dentry *parent = NULL; | ||
508 | |||
509 | if (inode && unlikely(!spin_trylock(&inode->i_lock))) | ||
510 | goto failed; | ||
511 | |||
512 | if (!IS_ROOT(dentry)) { | ||
513 | parent = dentry->d_parent; | ||
514 | if (unlikely(!spin_trylock(&parent->d_lock))) { | ||
515 | if (inode) | ||
516 | spin_unlock(&inode->i_lock); | ||
517 | goto failed; | ||
518 | } | ||
519 | } | ||
520 | |||
521 | __dentry_kill(dentry); | ||
522 | return parent; | ||
523 | |||
524 | failed: | ||
525 | spin_unlock(&dentry->d_lock); | ||
526 | cpu_relax(); | ||
527 | return dentry; /* try again with same dentry */ | ||
528 | } | ||
529 | |||
530 | static inline struct dentry *lock_parent(struct dentry *dentry) | ||
531 | { | ||
532 | struct dentry *parent = dentry->d_parent; | ||
533 | if (IS_ROOT(dentry)) | ||
534 | return NULL; | ||
535 | if (likely(spin_trylock(&parent->d_lock))) | ||
536 | return parent; | ||
537 | spin_unlock(&dentry->d_lock); | ||
538 | rcu_read_lock(); | ||
539 | again: | ||
540 | parent = ACCESS_ONCE(dentry->d_parent); | ||
541 | spin_lock(&parent->d_lock); | ||
542 | /* | ||
543 | * We can't blindly lock dentry until we are sure | ||
544 | * that we won't violate the locking order. | ||
545 | * Any changes of dentry->d_parent must have | ||
546 | * been done with parent->d_lock held, so | ||
547 | * spin_lock() above is enough of a barrier | ||
548 | * for checking if it's still our child. | ||
549 | */ | ||
550 | if (unlikely(parent != dentry->d_parent)) { | ||
551 | spin_unlock(&parent->d_lock); | ||
552 | goto again; | ||
553 | } | ||
554 | rcu_read_unlock(); | ||
555 | if (parent != dentry) | ||
556 | spin_lock(&dentry->d_lock); | ||
557 | else | ||
558 | parent = NULL; | ||
526 | return parent; | 559 | return parent; |
527 | } | 560 | } |
528 | 561 | ||
@@ -579,7 +612,7 @@ repeat: | |||
579 | return; | 612 | return; |
580 | 613 | ||
581 | kill_it: | 614 | kill_it: |
582 | dentry = dentry_kill(dentry, 1); | 615 | dentry = dentry_kill(dentry); |
583 | if (dentry) | 616 | if (dentry) |
584 | goto repeat; | 617 | goto repeat; |
585 | } | 618 | } |
@@ -797,8 +830,11 @@ static void shrink_dentry_list(struct list_head *list) | |||
797 | struct dentry *dentry, *parent; | 830 | struct dentry *dentry, *parent; |
798 | 831 | ||
799 | while (!list_empty(list)) { | 832 | while (!list_empty(list)) { |
833 | struct inode *inode; | ||
800 | dentry = list_entry(list->prev, struct dentry, d_lru); | 834 | dentry = list_entry(list->prev, struct dentry, d_lru); |
801 | spin_lock(&dentry->d_lock); | 835 | spin_lock(&dentry->d_lock); |
836 | parent = lock_parent(dentry); | ||
837 | |||
802 | /* | 838 | /* |
803 | * The dispose list is isolated and dentries are not accounted | 839 | * The dispose list is isolated and dentries are not accounted |
804 | * to the LRU here, so we can simply remove it from the list | 840 | * to the LRU here, so we can simply remove it from the list |
@@ -812,26 +848,33 @@ static void shrink_dentry_list(struct list_head *list) | |||
812 | */ | 848 | */ |
813 | if ((int)dentry->d_lockref.count > 0) { | 849 | if ((int)dentry->d_lockref.count > 0) { |
814 | spin_unlock(&dentry->d_lock); | 850 | spin_unlock(&dentry->d_lock); |
851 | if (parent) | ||
852 | spin_unlock(&parent->d_lock); | ||
815 | continue; | 853 | continue; |
816 | } | 854 | } |
817 | 855 | ||
818 | parent = dentry_kill(dentry, 0); | 856 | |
819 | /* | 857 | if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { |
820 | * If dentry_kill returns NULL, we have nothing more to do. | 858 | bool can_free = dentry->d_flags & DCACHE_MAY_FREE; |
821 | */ | 859 | spin_unlock(&dentry->d_lock); |
822 | if (!parent) | 860 | if (parent) |
861 | spin_unlock(&parent->d_lock); | ||
862 | if (can_free) | ||
863 | dentry_free(dentry); | ||
823 | continue; | 864 | continue; |
865 | } | ||
824 | 866 | ||
825 | if (unlikely(parent == dentry)) { | 867 | inode = dentry->d_inode; |
826 | /* | 868 | if (inode && unlikely(!spin_trylock(&inode->i_lock))) { |
827 | * trylocks have failed and d_lock has been held the | ||
828 | * whole time, so it could not have been added to any | ||
829 | * other lists. Just add it back to the shrink list. | ||
830 | */ | ||
831 | d_shrink_add(dentry, list); | 869 | d_shrink_add(dentry, list); |
832 | spin_unlock(&dentry->d_lock); | 870 | spin_unlock(&dentry->d_lock); |
871 | if (parent) | ||
872 | spin_unlock(&parent->d_lock); | ||
833 | continue; | 873 | continue; |
834 | } | 874 | } |
875 | |||
876 | __dentry_kill(dentry); | ||
877 | |||
835 | /* | 878 | /* |
836 | * We need to prune ancestors too. This is necessary to prevent | 879 | * We need to prune ancestors too. This is necessary to prevent |
837 | * quadratic behavior of shrink_dcache_parent(), but is also | 880 | * quadratic behavior of shrink_dcache_parent(), but is also |
@@ -839,8 +882,26 @@ static void shrink_dentry_list(struct list_head *list) | |||
839 | * fragmentation. | 882 | * fragmentation. |
840 | */ | 883 | */ |
841 | dentry = parent; | 884 | dentry = parent; |
842 | while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) | 885 | while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) { |
843 | dentry = dentry_kill(dentry, 1); | 886 | parent = lock_parent(dentry); |
887 | if (dentry->d_lockref.count != 1) { | ||
888 | dentry->d_lockref.count--; | ||
889 | spin_unlock(&dentry->d_lock); | ||
890 | if (parent) | ||
891 | spin_unlock(&parent->d_lock); | ||
892 | break; | ||
893 | } | ||
894 | inode = dentry->d_inode; /* can't be NULL */ | ||
895 | if (unlikely(!spin_trylock(&inode->i_lock))) { | ||
896 | spin_unlock(&dentry->d_lock); | ||
897 | if (parent) | ||
898 | spin_unlock(&parent->d_lock); | ||
899 | cpu_relax(); | ||
900 | continue; | ||
901 | } | ||
902 | __dentry_kill(dentry); | ||
903 | dentry = parent; | ||
904 | } | ||
844 | } | 905 | } |
845 | } | 906 | } |
846 | 907 | ||