diff options
author | Dave Chinner <dchinner@redhat.com> | 2013-08-27 20:17:55 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2013-09-10 18:56:30 -0400 |
commit | dd1f6b2e43a53ee58eb87d5e623cf44e277d005d (patch) | |
tree | dbc3939af5992851c3e9e2978191e0eeab52a11f /fs | |
parent | 19156840e33a23eeb1a749c0f991dab6588b077d (diff) |
dcache: remove dentries from LRU before putting on dispose list
One of the big problems with modifying the way the dcache shrinker and LRU
implementation works is that the LRU is abused in several ways. One of
these is shrink_dentry_list().
Basically, we can move a dentry off the LRU onto a different list without
doing any accounting changes, and then use dentry_lru_prune() to remove it
from what-ever list it is now on to do the LRU accounting at that point.
This makes it -really hard- to change the LRU implementation. The use of
the per-sb LRU lock serialises movement of the dentries between the
different lists and the removal of them, and this is the only reason that
it works. If we want to break up the dentry LRU lock and lists into, say,
per-node lists, we remove the only serialisation that allows this lru
list/dispose list abuse to work.
To make this work effectively, the dispose list has to be isolated from
the LRU list - dentries have to be removed from the LRU *before* being
placed on the dispose list. This means that the LRU accounting and
isolation is completed before disposal is started, and that means we can
change the LRU implementation freely in future.
This means that dentries *must* be marked with DCACHE_SHRINK_LIST when
they are placed on the dispose list so that we don't think that parent
dentries found in try_prune_one_dentry() are on the LRU when the are
actually on the dispose list. This would result in accounting the dentry
to the LRU a second time. Hence dentry_lru_del() has to handle the
DCACHE_SHRINK_LIST case
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Glauber Costa <glommer@openvz.org>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Cc: Arve Hjønnevåg <arve@android.com>
Cc: Carlos Maiolino <cmaiolino@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: David Rientjes <rientjes@google.com>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: J. Bruce Fields <bfields@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Kent Overstreet <koverstreet@google.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: Thomas Hellstrom <thellstrom@vmware.com>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/dcache.c | 99 |
1 files changed, 78 insertions, 21 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index e989ecb44a65..509b49410943 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -356,7 +356,7 @@ static void dentry_unlink_inode(struct dentry * dentry) | |||
356 | } | 356 | } |
357 | 357 | ||
358 | /* | 358 | /* |
359 | * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held. | 359 | * dentry_lru_(add|del|move_list) must be called with d_lock held. |
360 | */ | 360 | */ |
361 | static void dentry_lru_add(struct dentry *dentry) | 361 | static void dentry_lru_add(struct dentry *dentry) |
362 | { | 362 | { |
@@ -373,16 +373,26 @@ static void dentry_lru_add(struct dentry *dentry) | |||
373 | static void __dentry_lru_del(struct dentry *dentry) | 373 | static void __dentry_lru_del(struct dentry *dentry) |
374 | { | 374 | { |
375 | list_del_init(&dentry->d_lru); | 375 | list_del_init(&dentry->d_lru); |
376 | dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST); | 376 | dentry->d_flags &= ~DCACHE_LRU_LIST; |
377 | dentry->d_sb->s_nr_dentry_unused--; | 377 | dentry->d_sb->s_nr_dentry_unused--; |
378 | this_cpu_dec(nr_dentry_unused); | 378 | this_cpu_dec(nr_dentry_unused); |
379 | } | 379 | } |
380 | 380 | ||
381 | /* | 381 | /* |
382 | * Remove a dentry with references from the LRU. | 382 | * Remove a dentry with references from the LRU. |
383 | * | ||
384 | * If we are on the shrink list, then we can get to try_prune_one_dentry() and | ||
385 | * lose our last reference through the parent walk. In this case, we need to | ||
386 | * remove ourselves from the shrink list, not the LRU. | ||
383 | */ | 387 | */ |
384 | static void dentry_lru_del(struct dentry *dentry) | 388 | static void dentry_lru_del(struct dentry *dentry) |
385 | { | 389 | { |
390 | if (dentry->d_flags & DCACHE_SHRINK_LIST) { | ||
391 | list_del_init(&dentry->d_lru); | ||
392 | dentry->d_flags &= ~DCACHE_SHRINK_LIST; | ||
393 | return; | ||
394 | } | ||
395 | |||
386 | if (!list_empty(&dentry->d_lru)) { | 396 | if (!list_empty(&dentry->d_lru)) { |
387 | spin_lock(&dentry->d_sb->s_dentry_lru_lock); | 397 | spin_lock(&dentry->d_sb->s_dentry_lru_lock); |
388 | __dentry_lru_del(dentry); | 398 | __dentry_lru_del(dentry); |
@@ -392,14 +402,16 @@ static void dentry_lru_del(struct dentry *dentry) | |||
392 | 402 | ||
393 | static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) | 403 | static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list) |
394 | { | 404 | { |
405 | BUG_ON(dentry->d_flags & DCACHE_SHRINK_LIST); | ||
406 | |||
395 | spin_lock(&dentry->d_sb->s_dentry_lru_lock); | 407 | spin_lock(&dentry->d_sb->s_dentry_lru_lock); |
396 | if (list_empty(&dentry->d_lru)) { | 408 | if (list_empty(&dentry->d_lru)) { |
397 | dentry->d_flags |= DCACHE_LRU_LIST; | 409 | dentry->d_flags |= DCACHE_LRU_LIST; |
398 | list_add_tail(&dentry->d_lru, list); | 410 | list_add_tail(&dentry->d_lru, list); |
399 | dentry->d_sb->s_nr_dentry_unused++; | ||
400 | this_cpu_inc(nr_dentry_unused); | ||
401 | } else { | 411 | } else { |
402 | list_move_tail(&dentry->d_lru, list); | 412 | list_move_tail(&dentry->d_lru, list); |
413 | dentry->d_sb->s_nr_dentry_unused--; | ||
414 | this_cpu_dec(nr_dentry_unused); | ||
403 | } | 415 | } |
404 | spin_unlock(&dentry->d_sb->s_dentry_lru_lock); | 416 | spin_unlock(&dentry->d_sb->s_dentry_lru_lock); |
405 | } | 417 | } |
@@ -497,7 +509,8 @@ EXPORT_SYMBOL(d_drop); | |||
497 | * If ref is non-zero, then decrement the refcount too. | 509 | * If ref is non-zero, then decrement the refcount too. |
498 | * Returns dentry requiring refcount drop, or NULL if we're done. | 510 | * Returns dentry requiring refcount drop, or NULL if we're done. |
499 | */ | 511 | */ |
500 | static inline struct dentry *dentry_kill(struct dentry *dentry) | 512 | static inline struct dentry * |
513 | dentry_kill(struct dentry *dentry, int unlock_on_failure) | ||
501 | __releases(dentry->d_lock) | 514 | __releases(dentry->d_lock) |
502 | { | 515 | { |
503 | struct inode *inode; | 516 | struct inode *inode; |
@@ -506,8 +519,10 @@ static inline struct dentry *dentry_kill(struct dentry *dentry) | |||
506 | inode = dentry->d_inode; | 519 | inode = dentry->d_inode; |
507 | if (inode && !spin_trylock(&inode->i_lock)) { | 520 | if (inode && !spin_trylock(&inode->i_lock)) { |
508 | relock: | 521 | relock: |
509 | spin_unlock(&dentry->d_lock); | 522 | if (unlock_on_failure) { |
510 | cpu_relax(); | 523 | spin_unlock(&dentry->d_lock); |
524 | cpu_relax(); | ||
525 | } | ||
511 | return dentry; /* try again with same dentry */ | 526 | return dentry; /* try again with same dentry */ |
512 | } | 527 | } |
513 | if (IS_ROOT(dentry)) | 528 | if (IS_ROOT(dentry)) |
@@ -590,7 +605,7 @@ repeat: | |||
590 | return; | 605 | return; |
591 | 606 | ||
592 | kill_it: | 607 | kill_it: |
593 | dentry = dentry_kill(dentry); | 608 | dentry = dentry_kill(dentry, 1); |
594 | if (dentry) | 609 | if (dentry) |
595 | goto repeat; | 610 | goto repeat; |
596 | } | 611 | } |
@@ -810,12 +825,12 @@ EXPORT_SYMBOL(d_prune_aliases); | |||
810 | * | 825 | * |
811 | * This may fail if locks cannot be acquired no problem, just try again. | 826 | * This may fail if locks cannot be acquired no problem, just try again. |
812 | */ | 827 | */ |
813 | static void try_prune_one_dentry(struct dentry *dentry) | 828 | static struct dentry * try_prune_one_dentry(struct dentry *dentry) |
814 | __releases(dentry->d_lock) | 829 | __releases(dentry->d_lock) |
815 | { | 830 | { |
816 | struct dentry *parent; | 831 | struct dentry *parent; |
817 | 832 | ||
818 | parent = dentry_kill(dentry); | 833 | parent = dentry_kill(dentry, 0); |
819 | /* | 834 | /* |
820 | * If dentry_kill returns NULL, we have nothing more to do. | 835 | * If dentry_kill returns NULL, we have nothing more to do. |
821 | * if it returns the same dentry, trylocks failed. In either | 836 | * if it returns the same dentry, trylocks failed. In either |
@@ -827,17 +842,18 @@ static void try_prune_one_dentry(struct dentry *dentry) | |||
827 | * fragmentation. | 842 | * fragmentation. |
828 | */ | 843 | */ |
829 | if (!parent) | 844 | if (!parent) |
830 | return; | 845 | return NULL; |
831 | if (parent == dentry) | 846 | if (parent == dentry) |
832 | return; | 847 | return dentry; |
833 | 848 | ||
834 | /* Prune ancestors. */ | 849 | /* Prune ancestors. */ |
835 | dentry = parent; | 850 | dentry = parent; |
836 | while (dentry) { | 851 | while (dentry) { |
837 | if (lockref_put_or_lock(&dentry->d_lockref)) | 852 | if (lockref_put_or_lock(&dentry->d_lockref)) |
838 | return; | 853 | return NULL; |
839 | dentry = dentry_kill(dentry); | 854 | dentry = dentry_kill(dentry, 1); |
840 | } | 855 | } |
856 | return NULL; | ||
841 | } | 857 | } |
842 | 858 | ||
843 | static void shrink_dentry_list(struct list_head *list) | 859 | static void shrink_dentry_list(struct list_head *list) |
@@ -856,21 +872,31 @@ static void shrink_dentry_list(struct list_head *list) | |||
856 | } | 872 | } |
857 | 873 | ||
858 | /* | 874 | /* |
875 | * The dispose list is isolated and dentries are not accounted | ||
876 | * to the LRU here, so we can simply remove it from the list | ||
877 | * here regardless of whether it is referenced or not. | ||
878 | */ | ||
879 | list_del_init(&dentry->d_lru); | ||
880 | dentry->d_flags &= ~DCACHE_SHRINK_LIST; | ||
881 | |||
882 | /* | ||
859 | * We found an inuse dentry which was not removed from | 883 | * We found an inuse dentry which was not removed from |
860 | * the LRU because of laziness during lookup. Do not free | 884 | * the LRU because of laziness during lookup. Do not free it. |
861 | * it - just keep it off the LRU list. | ||
862 | */ | 885 | */ |
863 | if (dentry->d_lockref.count) { | 886 | if (dentry->d_lockref.count) { |
864 | dentry_lru_del(dentry); | ||
865 | spin_unlock(&dentry->d_lock); | 887 | spin_unlock(&dentry->d_lock); |
866 | continue; | 888 | continue; |
867 | } | 889 | } |
868 | |||
869 | rcu_read_unlock(); | 890 | rcu_read_unlock(); |
870 | 891 | ||
871 | try_prune_one_dentry(dentry); | 892 | dentry = try_prune_one_dentry(dentry); |
872 | 893 | ||
873 | rcu_read_lock(); | 894 | rcu_read_lock(); |
895 | if (dentry) { | ||
896 | dentry->d_flags |= DCACHE_SHRINK_LIST; | ||
897 | list_add(&dentry->d_lru, list); | ||
898 | spin_unlock(&dentry->d_lock); | ||
899 | } | ||
874 | } | 900 | } |
875 | rcu_read_unlock(); | 901 | rcu_read_unlock(); |
876 | } | 902 | } |
@@ -911,8 +937,10 @@ relock: | |||
911 | list_move(&dentry->d_lru, &referenced); | 937 | list_move(&dentry->d_lru, &referenced); |
912 | spin_unlock(&dentry->d_lock); | 938 | spin_unlock(&dentry->d_lock); |
913 | } else { | 939 | } else { |
914 | list_move_tail(&dentry->d_lru, &tmp); | 940 | list_move(&dentry->d_lru, &tmp); |
915 | dentry->d_flags |= DCACHE_SHRINK_LIST; | 941 | dentry->d_flags |= DCACHE_SHRINK_LIST; |
942 | this_cpu_dec(nr_dentry_unused); | ||
943 | sb->s_nr_dentry_unused--; | ||
916 | spin_unlock(&dentry->d_lock); | 944 | spin_unlock(&dentry->d_lock); |
917 | if (!--count) | 945 | if (!--count) |
918 | break; | 946 | break; |
@@ -926,6 +954,27 @@ relock: | |||
926 | shrink_dentry_list(&tmp); | 954 | shrink_dentry_list(&tmp); |
927 | } | 955 | } |
928 | 956 | ||
957 | /* | ||
958 | * Mark all the dentries as on being the dispose list so we don't think they are | ||
959 | * still on the LRU if we try to kill them from ascending the parent chain in | ||
960 | * try_prune_one_dentry() rather than directly from the dispose list. | ||
961 | */ | ||
962 | static void | ||
963 | shrink_dcache_list( | ||
964 | struct list_head *dispose) | ||
965 | { | ||
966 | struct dentry *dentry; | ||
967 | |||
968 | rcu_read_lock(); | ||
969 | list_for_each_entry_rcu(dentry, dispose, d_lru) { | ||
970 | spin_lock(&dentry->d_lock); | ||
971 | dentry->d_flags |= DCACHE_SHRINK_LIST; | ||
972 | spin_unlock(&dentry->d_lock); | ||
973 | } | ||
974 | rcu_read_unlock(); | ||
975 | shrink_dentry_list(dispose); | ||
976 | } | ||
977 | |||
929 | /** | 978 | /** |
930 | * shrink_dcache_sb - shrink dcache for a superblock | 979 | * shrink_dcache_sb - shrink dcache for a superblock |
931 | * @sb: superblock | 980 | * @sb: superblock |
@@ -939,9 +988,17 @@ void shrink_dcache_sb(struct super_block *sb) | |||
939 | 988 | ||
940 | spin_lock(&sb->s_dentry_lru_lock); | 989 | spin_lock(&sb->s_dentry_lru_lock); |
941 | while (!list_empty(&sb->s_dentry_lru)) { | 990 | while (!list_empty(&sb->s_dentry_lru)) { |
991 | /* | ||
992 | * account for removal here so we don't need to handle it later | ||
993 | * even though the dentry is no longer on the lru list. | ||
994 | */ | ||
942 | list_splice_init(&sb->s_dentry_lru, &tmp); | 995 | list_splice_init(&sb->s_dentry_lru, &tmp); |
996 | this_cpu_sub(nr_dentry_unused, sb->s_nr_dentry_unused); | ||
997 | sb->s_nr_dentry_unused = 0; | ||
943 | spin_unlock(&sb->s_dentry_lru_lock); | 998 | spin_unlock(&sb->s_dentry_lru_lock); |
944 | shrink_dentry_list(&tmp); | 999 | |
1000 | shrink_dcache_list(&tmp); | ||
1001 | |||
945 | spin_lock(&sb->s_dentry_lru_lock); | 1002 | spin_lock(&sb->s_dentry_lru_lock); |
946 | } | 1003 | } |
947 | spin_unlock(&sb->s_dentry_lru_lock); | 1004 | spin_unlock(&sb->s_dentry_lru_lock); |