diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2014-05-29 08:54:52 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2014-05-30 11:03:21 -0400 |
commit | 046b961b45f93a92e4c70525a12f3d378bced130 (patch) | |
tree | 25c7bf216122f3c49c5ed0516b6b4b260316d85d | |
parent | ff2fde9929feb2aef45377ce56b8b12df85dda69 (diff) |
shrink_dentry_list(): take parent's ->d_lock earlier
The cause of livelocks there is that we are taking ->d_lock on
dentry and its parent in the wrong order, forcing us to use
trylock on the parent's one. d_walk() takes them in the right
order, and unfortunately it's not hard to create a situation
when shrink_dentry_list() can't make progress since trylock
keeps failing, and shrink_dcache_parent() or check_submounts_and_drop()
keeps calling d_walk() disrupting the very shrink_dentry_list() it's
waiting for.
Solution is straightforward - if that trylock fails, let's unlock
the dentry itself and take locks in the right order. We need to
stabilize ->d_parent without holding ->d_lock, but that's doable
using RCU. And we'd better do that in the very beginning of the
loop in shrink_dentry_list(), since the checks on refcount, etc.
would need to be redone anyway.
That deals with a half of the problem - killing dentries on the
shrink list itself. Another one (dropping their parents) is
in the next commit.
locking parent is interesting - it would be easy to do rcu_read_lock(),
lock whatever we think is a parent, lock dentry itself and check
if the parent is still the right one. Except that we need to check
that *before* locking the dentry, or we are risking taking ->d_lock
out of order. Fortunately, once the D1 is locked, we can check if
D2->d_parent is equal to D1 without the need to lock D2; D2->d_parent
can start or stop pointing to D1 only under D1->d_lock, so taking
D1->d_lock is enough. In other words, the right solution is
rcu_read_lock/lock what looks like parent right now/check if it's
still our parent/rcu_read_unlock/lock the child.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | fs/dcache.c | 53 |
1 files changed, 41 insertions, 12 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index c23f78a9d156..d54a99baf4f3 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -530,6 +530,38 @@ failed: | |||
530 | return dentry; /* try again with same dentry */ | 530 | return dentry; /* try again with same dentry */ |
531 | } | 531 | } |
532 | 532 | ||
533 | static inline struct dentry *lock_parent(struct dentry *dentry) | ||
534 | { | ||
535 | struct dentry *parent = dentry->d_parent; | ||
536 | if (IS_ROOT(dentry)) | ||
537 | return NULL; | ||
538 | if (likely(spin_trylock(&parent->d_lock))) | ||
539 | return parent; | ||
540 | spin_unlock(&dentry->d_lock); | ||
541 | rcu_read_lock(); | ||
542 | again: | ||
543 | parent = ACCESS_ONCE(dentry->d_parent); | ||
544 | spin_lock(&parent->d_lock); | ||
545 | /* | ||
546 | * We can't blindly lock dentry until we are sure | ||
547 | * that we won't violate the locking order. | ||
548 | * Any changes of dentry->d_parent must have | ||
549 | * been done with parent->d_lock held, so | ||
550 | * spin_lock() above is enough of a barrier | ||
551 | * for checking if it's still our child. | ||
552 | */ | ||
553 | if (unlikely(parent != dentry->d_parent)) { | ||
554 | spin_unlock(&parent->d_lock); | ||
555 | goto again; | ||
556 | } | ||
557 | rcu_read_unlock(); | ||
558 | if (parent != dentry) | ||
559 | spin_lock(&dentry->d_lock); | ||
560 | else | ||
561 | parent = NULL; | ||
562 | return parent; | ||
563 | } | ||
564 | |||
533 | /* | 565 | /* |
534 | * This is dput | 566 | * This is dput |
535 | * | 567 | * |
@@ -804,6 +836,8 @@ static void shrink_dentry_list(struct list_head *list) | |||
804 | struct inode *inode; | 836 | struct inode *inode; |
805 | dentry = list_entry(list->prev, struct dentry, d_lru); | 837 | dentry = list_entry(list->prev, struct dentry, d_lru); |
806 | spin_lock(&dentry->d_lock); | 838 | spin_lock(&dentry->d_lock); |
839 | parent = lock_parent(dentry); | ||
840 | |||
807 | /* | 841 | /* |
808 | * The dispose list is isolated and dentries are not accounted | 842 | * The dispose list is isolated and dentries are not accounted |
809 | * to the LRU here, so we can simply remove it from the list | 843 | * to the LRU here, so we can simply remove it from the list |
@@ -817,6 +851,8 @@ static void shrink_dentry_list(struct list_head *list) | |||
817 | */ | 851 | */ |
818 | if ((int)dentry->d_lockref.count > 0) { | 852 | if ((int)dentry->d_lockref.count > 0) { |
819 | spin_unlock(&dentry->d_lock); | 853 | spin_unlock(&dentry->d_lock); |
854 | if (parent) | ||
855 | spin_unlock(&parent->d_lock); | ||
820 | continue; | 856 | continue; |
821 | } | 857 | } |
822 | 858 | ||
@@ -824,6 +860,8 @@ static void shrink_dentry_list(struct list_head *list) | |||
824 | if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { | 860 | if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { |
825 | bool can_free = dentry->d_flags & DCACHE_MAY_FREE; | 861 | bool can_free = dentry->d_flags & DCACHE_MAY_FREE; |
826 | spin_unlock(&dentry->d_lock); | 862 | spin_unlock(&dentry->d_lock); |
863 | if (parent) | ||
864 | spin_unlock(&parent->d_lock); | ||
827 | if (can_free) | 865 | if (can_free) |
828 | dentry_free(dentry); | 866 | dentry_free(dentry); |
829 | continue; | 867 | continue; |
@@ -833,22 +871,13 @@ static void shrink_dentry_list(struct list_head *list) | |||
833 | if (inode && unlikely(!spin_trylock(&inode->i_lock))) { | 871 | if (inode && unlikely(!spin_trylock(&inode->i_lock))) { |
834 | d_shrink_add(dentry, list); | 872 | d_shrink_add(dentry, list); |
835 | spin_unlock(&dentry->d_lock); | 873 | spin_unlock(&dentry->d_lock); |
874 | if (parent) | ||
875 | spin_unlock(&parent->d_lock); | ||
836 | continue; | 876 | continue; |
837 | } | 877 | } |
838 | 878 | ||
839 | parent = NULL; | ||
840 | if (!IS_ROOT(dentry)) { | ||
841 | parent = dentry->d_parent; | ||
842 | if (unlikely(!spin_trylock(&parent->d_lock))) { | ||
843 | if (inode) | ||
844 | spin_unlock(&inode->i_lock); | ||
845 | d_shrink_add(dentry, list); | ||
846 | spin_unlock(&dentry->d_lock); | ||
847 | continue; | ||
848 | } | ||
849 | } | ||
850 | |||
851 | __dentry_kill(dentry); | 879 | __dentry_kill(dentry); |
880 | |||
852 | /* | 881 | /* |
853 | * We need to prune ancestors too. This is necessary to prevent | 882 | * We need to prune ancestors too. This is necessary to prevent |
854 | * quadratic behavior of shrink_dcache_parent(), but is also | 883 | * quadratic behavior of shrink_dcache_parent(), but is also |