summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAl Viro <viro@zeniv.linux.org.uk>2019-06-29 18:31:24 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2019-07-10 07:32:22 -0400
commit9bdebc2bd1c4abfbf44dc154cc152ec333e004de (patch)
tree9438b3e202e8adbb374ca2bbb607b3ca37026dbe
parente4e59906cfe4e215a4360962bedb3d6e5cca5158 (diff)
Teach shrink_dcache_parent() to cope with mixed-filesystem shrink lists
Currently, running into a shrink list that contains dentries from different filesystems can cause several unpleasant things for shrink_dcache_parent() and for umount(2). The first problem is that there's a window during shrink_dentry_list() between __dentry_kill() takes a victim out and dropping reference to its parent. During that window the parent looks like a genuine busy dentry. shrink_dcache_parent() (or, worse yet, shrink_dcache_for_umount()) coming at that time will see no eviction candidates and no indication that it needs to wait for some shrink_dentry_list() to proceed further. That applies for any shrink list that might intersect with the subtree we are trying to shrink; the only reason it does not blow on umount(2) in the mainline is that we unregister the memory shrinker before hitting shrink_dcache_for_umount(). Another problem happens if something in a mixed-filesystem shrink list gets be stuck in e.g. iput(), getting umount of unrelated fs to spin waiting for the stuck shrinker to get around to our dentries. Solution: 1) have shrink_dentry_list() decrement the parent's refcount and make sure it's on a shrink list (ours unless it already had been on some other) before calling __dentry_kill(). That eliminates the window when shrink_dcache_parent() would've blown past the entire subtree without noticing anything with zero refcount not on shrink lists. 2) when shrink_dcache_parent() has found no eviction candidates, but some dentries are still sitting on shrink lists, rather than repeating the scan in hope that shrinkers have progressed, scan looking for something on shrink lists with zero refcount. If such a thing is found, grab rcu_read_lock() and stop the scan, with caller locking it for eviction, dropping out of RCU and doing __dentry_kill(), with the same treatment for parent as shrink_dentry_list() would do. Note that right now mixed-filesystem shrink lists do not occur, so this is not a mainline bug. Howevere, there's a bunch of uses for such beasts (e.g. the "try and evict everything we can out of given page" patches; there are potential uses in mount-related code, considerably simplifying the life in fs/namespace.c, etc.) Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r--fs/dcache.c100
-rw-r--r--fs/internal.h2
2 files changed, 85 insertions, 17 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index c435398f2c81..01b8cae41a71 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -861,6 +861,32 @@ void dput(struct dentry *dentry)
861} 861}
862EXPORT_SYMBOL(dput); 862EXPORT_SYMBOL(dput);
863 863
864static void __dput_to_list(struct dentry *dentry, struct list_head *list)
865__must_hold(&dentry->d_lock)
866{
867 if (dentry->d_flags & DCACHE_SHRINK_LIST) {
868 /* let the owner of the list it's on deal with it */
869 --dentry->d_lockref.count;
870 } else {
871 if (dentry->d_flags & DCACHE_LRU_LIST)
872 d_lru_del(dentry);
873 if (!--dentry->d_lockref.count)
874 d_shrink_add(dentry, list);
875 }
876}
877
878void dput_to_list(struct dentry *dentry, struct list_head *list)
879{
880 rcu_read_lock();
881 if (likely(fast_dput(dentry))) {
882 rcu_read_unlock();
883 return;
884 }
885 rcu_read_unlock();
886 if (!retain_dentry(dentry))
887 __dput_to_list(dentry, list);
888 spin_unlock(&dentry->d_lock);
889}
864 890
865/* This must be called with d_lock held */ 891/* This must be called with d_lock held */
866static inline void __dget_dlock(struct dentry *dentry) 892static inline void __dget_dlock(struct dentry *dentry)
@@ -1067,7 +1093,7 @@ out:
1067 return false; 1093 return false;
1068} 1094}
1069 1095
1070static void shrink_dentry_list(struct list_head *list) 1096void shrink_dentry_list(struct list_head *list)
1071{ 1097{
1072 while (!list_empty(list)) { 1098 while (!list_empty(list)) {
1073 struct dentry *dentry, *parent; 1099 struct dentry *dentry, *parent;
@@ -1089,18 +1115,9 @@ static void shrink_dentry_list(struct list_head *list)
1089 rcu_read_unlock(); 1115 rcu_read_unlock();
1090 d_shrink_del(dentry); 1116 d_shrink_del(dentry);
1091 parent = dentry->d_parent; 1117 parent = dentry->d_parent;
1118 if (parent != dentry)
1119 __dput_to_list(parent, list);
1092 __dentry_kill(dentry); 1120 __dentry_kill(dentry);
1093 if (parent == dentry)
1094 continue;
1095 /*
1096 * We need to prune ancestors too. This is necessary to prevent
1097 * quadratic behavior of shrink_dcache_parent(), but is also
1098 * expected to be beneficial in reducing dentry cache
1099 * fragmentation.
1100 */
1101 dentry = parent;
1102 while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
1103 dentry = dentry_kill(dentry);
1104 } 1121 }
1105} 1122}
1106 1123
@@ -1445,8 +1462,11 @@ out:
1445 1462
1446struct select_data { 1463struct select_data {
1447 struct dentry *start; 1464 struct dentry *start;
1465 union {
1466 long found;
1467 struct dentry *victim;
1468 };
1448 struct list_head dispose; 1469 struct list_head dispose;
1449 int found;
1450}; 1470};
1451 1471
1452static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) 1472static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
@@ -1478,6 +1498,37 @@ out:
1478 return ret; 1498 return ret;
1479} 1499}
1480 1500
1501static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry)
1502{
1503 struct select_data *data = _data;
1504 enum d_walk_ret ret = D_WALK_CONTINUE;
1505
1506 if (data->start == dentry)
1507 goto out;
1508
1509 if (dentry->d_flags & DCACHE_SHRINK_LIST) {
1510 if (!dentry->d_lockref.count) {
1511 rcu_read_lock();
1512 data->victim = dentry;
1513 return D_WALK_QUIT;
1514 }
1515 } else {
1516 if (dentry->d_flags & DCACHE_LRU_LIST)
1517 d_lru_del(dentry);
1518 if (!dentry->d_lockref.count)
1519 d_shrink_add(dentry, &data->dispose);
1520 }
1521 /*
1522 * We can return to the caller if we have found some (this
1523 * ensures forward progress). We'll be coming back to find
1524 * the rest.
1525 */
1526 if (!list_empty(&data->dispose))
1527 ret = need_resched() ? D_WALK_QUIT : D_WALK_NORETRY;
1528out:
1529 return ret;
1530}
1531
1481/** 1532/**
1482 * shrink_dcache_parent - prune dcache 1533 * shrink_dcache_parent - prune dcache
1483 * @parent: parent of entries to prune 1534 * @parent: parent of entries to prune
@@ -1487,12 +1538,9 @@ out:
1487void shrink_dcache_parent(struct dentry *parent) 1538void shrink_dcache_parent(struct dentry *parent)
1488{ 1539{
1489 for (;;) { 1540 for (;;) {
1490 struct select_data data; 1541 struct select_data data = {.start = parent};
1491 1542
1492 INIT_LIST_HEAD(&data.dispose); 1543 INIT_LIST_HEAD(&data.dispose);
1493 data.start = parent;
1494 data.found = 0;
1495
1496 d_walk(parent, &data, select_collect); 1544 d_walk(parent, &data, select_collect);
1497 1545
1498 if (!list_empty(&data.dispose)) { 1546 if (!list_empty(&data.dispose)) {
@@ -1503,6 +1551,24 @@ void shrink_dcache_parent(struct dentry *parent)
1503 cond_resched(); 1551 cond_resched();
1504 if (!data.found) 1552 if (!data.found)
1505 break; 1553 break;
1554 data.victim = NULL;
1555 d_walk(parent, &data, select_collect2);
1556 if (data.victim) {
1557 struct dentry *parent;
1558 spin_lock(&data.victim->d_lock);
1559 if (!shrink_lock_dentry(data.victim)) {
1560 spin_unlock(&data.victim->d_lock);
1561 rcu_read_unlock();
1562 } else {
1563 rcu_read_unlock();
1564 parent = data.victim->d_parent;
1565 if (parent != data.victim)
1566 __dput_to_list(parent, &data.dispose);
1567 __dentry_kill(data.victim);
1568 }
1569 }
1570 if (!list_empty(&data.dispose))
1571 shrink_dentry_list(&data.dispose);
1506 } 1572 }
1507} 1573}
1508EXPORT_SYMBOL(shrink_dcache_parent); 1574EXPORT_SYMBOL(shrink_dcache_parent);
diff --git a/fs/internal.h b/fs/internal.h
index a48ef81be37d..dc317abe31b5 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -156,6 +156,8 @@ extern int d_set_mounted(struct dentry *dentry);
156extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc); 156extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
157extern struct dentry *d_alloc_cursor(struct dentry *); 157extern struct dentry *d_alloc_cursor(struct dentry *);
158extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); 158extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
159extern void dput_to_list(struct dentry *, struct list_head *);
160extern void shrink_dentry_list(struct list_head *);
159 161
160/* 162/*
161 * read_write.c 163 * read_write.c