diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-05-30 12:52:55 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-05-30 12:52:55 -0400 |
| commit | 6f6111e4a73d0f6370eb8be4f8e4523210b6a67d (patch) | |
| tree | 18348a2d6793d637f913e25adca67287fdab24d0 | |
| parent | fe45736f4134b9656c656ac5e15b915192f2704a (diff) | |
| parent | 8cbf74da435d1bd13dbb790f94c7ff67b2fb6af4 (diff) | |
Merge branch 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs dcache livelock fix from Al Viro:
"Fixes for livelocks in shrink_dentry_list() introduced by fixes to
shrink list corruption; the root cause was that trylock of parent's
->d_lock could be disrupted by d_walk() happening on other CPUs,
resulting in shrink_dentry_list() making no progress *and* the same
d_walk() being called again and again for as long as
shrink_dentry_list() doesn't get past that mess.
The solution is to have shrink_dentry_list() treat that trylock
failure not as 'try to do the same thing again', but 'lock them in the
right order'"
* 'for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
dentry_kill() doesn't need the second argument now
dealing with the rest of shrink_dentry_list() livelock
shrink_dentry_list(): take parent's ->d_lock earlier
expand dentry_kill(dentry, 0) in shrink_dentry_list()
split dentry_kill()
lift the "already marked killed" case into shrink_dentry_list()
| -rw-r--r-- | fs/dcache.c | 153 |
1 files changed, 107 insertions, 46 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index 42ae01eefc07..bce851dc03ef 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -441,42 +441,12 @@ void d_drop(struct dentry *dentry) | |||
| 441 | } | 441 | } |
| 442 | EXPORT_SYMBOL(d_drop); | 442 | EXPORT_SYMBOL(d_drop); |
| 443 | 443 | ||
| 444 | /* | 444 | static void __dentry_kill(struct dentry *dentry) |
| 445 | * Finish off a dentry we've decided to kill. | ||
| 446 | * dentry->d_lock must be held, returns with it unlocked. | ||
| 447 | * If ref is non-zero, then decrement the refcount too. | ||
| 448 | * Returns dentry requiring refcount drop, or NULL if we're done. | ||
| 449 | */ | ||
| 450 | static struct dentry * | ||
| 451 | dentry_kill(struct dentry *dentry, int unlock_on_failure) | ||
| 452 | __releases(dentry->d_lock) | ||
| 453 | { | 445 | { |
| 454 | struct inode *inode; | ||
| 455 | struct dentry *parent = NULL; | 446 | struct dentry *parent = NULL; |
| 456 | bool can_free = true; | 447 | bool can_free = true; |
| 457 | |||
| 458 | if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { | ||
| 459 | can_free = dentry->d_flags & DCACHE_MAY_FREE; | ||
| 460 | spin_unlock(&dentry->d_lock); | ||
| 461 | goto out; | ||
| 462 | } | ||
| 463 | |||
| 464 | inode = dentry->d_inode; | ||
| 465 | if (inode && !spin_trylock(&inode->i_lock)) { | ||
| 466 | relock: | ||
| 467 | if (unlock_on_failure) { | ||
| 468 | spin_unlock(&dentry->d_lock); | ||
| 469 | cpu_relax(); | ||
| 470 | } | ||
| 471 | return dentry; /* try again with same dentry */ | ||
| 472 | } | ||
| 473 | if (!IS_ROOT(dentry)) | 448 | if (!IS_ROOT(dentry)) |
| 474 | parent = dentry->d_parent; | 449 | parent = dentry->d_parent; |
| 475 | if (parent && !spin_trylock(&parent->d_lock)) { | ||
| 476 | if (inode) | ||
| 477 | spin_unlock(&inode->i_lock); | ||
| 478 | goto relock; | ||
| 479 | } | ||
| 480 | 450 | ||
| 481 | /* | 451 | /* |
| 482 | * The dentry is now unrecoverably dead to the world. | 452 | * The dentry is now unrecoverably dead to the world. |
| @@ -520,9 +490,72 @@ relock: | |||
| 520 | can_free = false; | 490 | can_free = false; |
| 521 | } | 491 | } |
| 522 | spin_unlock(&dentry->d_lock); | 492 | spin_unlock(&dentry->d_lock); |
| 523 | out: | ||
| 524 | if (likely(can_free)) | 493 | if (likely(can_free)) |
| 525 | dentry_free(dentry); | 494 | dentry_free(dentry); |
| 495 | } | ||
| 496 | |||
| 497 | /* | ||
| 498 | * Finish off a dentry we've decided to kill. | ||
| 499 | * dentry->d_lock must be held, returns with it unlocked. | ||
| 500 | * If ref is non-zero, then decrement the refcount too. | ||
| 501 | * Returns dentry requiring refcount drop, or NULL if we're done. | ||
| 502 | */ | ||
| 503 | static struct dentry *dentry_kill(struct dentry *dentry) | ||
| 504 | __releases(dentry->d_lock) | ||
| 505 | { | ||
| 506 | struct inode *inode = dentry->d_inode; | ||
| 507 | struct dentry *parent = NULL; | ||
| 508 | |||
| 509 | if (inode && unlikely(!spin_trylock(&inode->i_lock))) | ||
| 510 | goto failed; | ||
| 511 | |||
| 512 | if (!IS_ROOT(dentry)) { | ||
| 513 | parent = dentry->d_parent; | ||
| 514 | if (unlikely(!spin_trylock(&parent->d_lock))) { | ||
| 515 | if (inode) | ||
| 516 | spin_unlock(&inode->i_lock); | ||
| 517 | goto failed; | ||
| 518 | } | ||
| 519 | } | ||
| 520 | |||
| 521 | __dentry_kill(dentry); | ||
| 522 | return parent; | ||
| 523 | |||
| 524 | failed: | ||
| 525 | spin_unlock(&dentry->d_lock); | ||
| 526 | cpu_relax(); | ||
| 527 | return dentry; /* try again with same dentry */ | ||
| 528 | } | ||
| 529 | |||
| 530 | static inline struct dentry *lock_parent(struct dentry *dentry) | ||
| 531 | { | ||
| 532 | struct dentry *parent = dentry->d_parent; | ||
| 533 | if (IS_ROOT(dentry)) | ||
| 534 | return NULL; | ||
| 535 | if (likely(spin_trylock(&parent->d_lock))) | ||
| 536 | return parent; | ||
| 537 | spin_unlock(&dentry->d_lock); | ||
| 538 | rcu_read_lock(); | ||
| 539 | again: | ||
| 540 | parent = ACCESS_ONCE(dentry->d_parent); | ||
| 541 | spin_lock(&parent->d_lock); | ||
| 542 | /* | ||
| 543 | * We can't blindly lock dentry until we are sure | ||
| 544 | * that we won't violate the locking order. | ||
| 545 | * Any changes of dentry->d_parent must have | ||
| 546 | * been done with parent->d_lock held, so | ||
| 547 | * spin_lock() above is enough of a barrier | ||
| 548 | * for checking if it's still our child. | ||
| 549 | */ | ||
| 550 | if (unlikely(parent != dentry->d_parent)) { | ||
| 551 | spin_unlock(&parent->d_lock); | ||
| 552 | goto again; | ||
| 553 | } | ||
| 554 | rcu_read_unlock(); | ||
| 555 | if (parent != dentry) | ||
| 556 | spin_lock(&dentry->d_lock); | ||
| 557 | else | ||
| 558 | parent = NULL; | ||
| 526 | return parent; | 559 | return parent; |
| 527 | } | 560 | } |
| 528 | 561 | ||
| @@ -579,7 +612,7 @@ repeat: | |||
| 579 | return; | 612 | return; |
| 580 | 613 | ||
| 581 | kill_it: | 614 | kill_it: |
| 582 | dentry = dentry_kill(dentry, 1); | 615 | dentry = dentry_kill(dentry); |
| 583 | if (dentry) | 616 | if (dentry) |
| 584 | goto repeat; | 617 | goto repeat; |
| 585 | } | 618 | } |
| @@ -797,8 +830,11 @@ static void shrink_dentry_list(struct list_head *list) | |||
| 797 | struct dentry *dentry, *parent; | 830 | struct dentry *dentry, *parent; |
| 798 | 831 | ||
| 799 | while (!list_empty(list)) { | 832 | while (!list_empty(list)) { |
| 833 | struct inode *inode; | ||
| 800 | dentry = list_entry(list->prev, struct dentry, d_lru); | 834 | dentry = list_entry(list->prev, struct dentry, d_lru); |
| 801 | spin_lock(&dentry->d_lock); | 835 | spin_lock(&dentry->d_lock); |
| 836 | parent = lock_parent(dentry); | ||
| 837 | |||
| 802 | /* | 838 | /* |
| 803 | * The dispose list is isolated and dentries are not accounted | 839 | * The dispose list is isolated and dentries are not accounted |
| 804 | * to the LRU here, so we can simply remove it from the list | 840 | * to the LRU here, so we can simply remove it from the list |
| @@ -812,26 +848,33 @@ static void shrink_dentry_list(struct list_head *list) | |||
| 812 | */ | 848 | */ |
| 813 | if ((int)dentry->d_lockref.count > 0) { | 849 | if ((int)dentry->d_lockref.count > 0) { |
| 814 | spin_unlock(&dentry->d_lock); | 850 | spin_unlock(&dentry->d_lock); |
| 851 | if (parent) | ||
| 852 | spin_unlock(&parent->d_lock); | ||
| 815 | continue; | 853 | continue; |
| 816 | } | 854 | } |
| 817 | 855 | ||
| 818 | parent = dentry_kill(dentry, 0); | 856 | |
| 819 | /* | 857 | if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { |
| 820 | * If dentry_kill returns NULL, we have nothing more to do. | 858 | bool can_free = dentry->d_flags & DCACHE_MAY_FREE; |
| 821 | */ | 859 | spin_unlock(&dentry->d_lock); |
| 822 | if (!parent) | 860 | if (parent) |
| 861 | spin_unlock(&parent->d_lock); | ||
| 862 | if (can_free) | ||
| 863 | dentry_free(dentry); | ||
| 823 | continue; | 864 | continue; |
| 865 | } | ||
| 824 | 866 | ||
| 825 | if (unlikely(parent == dentry)) { | 867 | inode = dentry->d_inode; |
| 826 | /* | 868 | if (inode && unlikely(!spin_trylock(&inode->i_lock))) { |
| 827 | * trylocks have failed and d_lock has been held the | ||
| 828 | * whole time, so it could not have been added to any | ||
| 829 | * other lists. Just add it back to the shrink list. | ||
| 830 | */ | ||
| 831 | d_shrink_add(dentry, list); | 869 | d_shrink_add(dentry, list); |
| 832 | spin_unlock(&dentry->d_lock); | 870 | spin_unlock(&dentry->d_lock); |
| 871 | if (parent) | ||
| 872 | spin_unlock(&parent->d_lock); | ||
| 833 | continue; | 873 | continue; |
| 834 | } | 874 | } |
| 875 | |||
| 876 | __dentry_kill(dentry); | ||
| 877 | |||
| 835 | /* | 878 | /* |
| 836 | * We need to prune ancestors too. This is necessary to prevent | 879 | * We need to prune ancestors too. This is necessary to prevent |
| 837 | * quadratic behavior of shrink_dcache_parent(), but is also | 880 | * quadratic behavior of shrink_dcache_parent(), but is also |
| @@ -839,8 +882,26 @@ static void shrink_dentry_list(struct list_head *list) | |||
| 839 | * fragmentation. | 882 | * fragmentation. |
| 840 | */ | 883 | */ |
| 841 | dentry = parent; | 884 | dentry = parent; |
| 842 | while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) | 885 | while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) { |
| 843 | dentry = dentry_kill(dentry, 1); | 886 | parent = lock_parent(dentry); |
| 887 | if (dentry->d_lockref.count != 1) { | ||
| 888 | dentry->d_lockref.count--; | ||
| 889 | spin_unlock(&dentry->d_lock); | ||
| 890 | if (parent) | ||
| 891 | spin_unlock(&parent->d_lock); | ||
| 892 | break; | ||
| 893 | } | ||
| 894 | inode = dentry->d_inode; /* can't be NULL */ | ||
| 895 | if (unlikely(!spin_trylock(&inode->i_lock))) { | ||
| 896 | spin_unlock(&dentry->d_lock); | ||
| 897 | if (parent) | ||
| 898 | spin_unlock(&parent->d_lock); | ||
| 899 | cpu_relax(); | ||
| 900 | continue; | ||
| 901 | } | ||
| 902 | __dentry_kill(dentry); | ||
| 903 | dentry = parent; | ||
| 904 | } | ||
| 844 | } | 905 | } |
| 845 | } | 906 | } |
| 846 | 907 | ||
