superblock: introduce per-sb cache shrinker infrastructure

With context based shrinkers, we can implement a per-superblock shrinker that shrinks the caches attached to the superblock. We currently have global shrinkers for the inode and dentry caches that split up into per-superblock operations via a coarse proportioning method that does not batch very well. The global shrinkers also have a dependency - dentries pin inodes - so we have to be very careful about how we register the global shrinkers so that the implicit call order is always correct. With a per-sb shrinker callout, we can encode this dependency directly into the per-sb shrinker, hence avoiding the need for strictly ordering shrinker registrations. We also have no need for any proportioning code for the shrinker subsystem already provides this functionality across all shrinkers. Allowing the shrinker to operate on a single superblock at a time means that we do less superblock list traversals and locking and reclaim should batch more effectively. This should result in less CPU overhead for reclaim and potentially faster reclaim of items from each filesystem. Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
author: Dave Chinner <dchinner@redhat.com> 2011-07-08 00:14:42 -0400
committer: Al Viro <viro@zeniv.linux.org.uk> 2011-07-20 20:47:10 -0400
commit: b0d40c92adafde7c2d81203ce7c1c69275f41140 (patch)
tree: f75a19dcd1a37aff23dc43323b58f014b1297c6b /fs/dcache.c
parent: 12ad3ab66103e6582ca69c0c9de18b13487eaaef (diff)
1 files changed, 12 insertions, 109 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index 41e2085d430b..2762804a140d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -743,13 +743,11 @@ static void shrink_dentry_list(struct list_head *list)
 *
 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
 */
-static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
+static void __shrink_dcache_sb(struct super_block *sb, int count, int flags)
 {
-        /* called from prune_dcache() and shrink_dcache_parent() */
        struct dentry *dentry;
        LIST_HEAD(referenced);
        LIST_HEAD(tmp);
-        int cnt = *count;
 relock:
        spin_lock(&dcache_lru_lock);
@@ -777,7 +775,7 @@ relock:
                } else {
                        list_move_tail(&dentry->d_lru, &tmp);
                        spin_unlock(&dentry->d_lock);
-                        if (!--cnt)
+                        if (!--count)
                                break;
                }
                cond_resched_lock(&dcache_lru_lock);
@@ -787,83 +785,22 @@ relock:
        spin_unlock(&dcache_lru_lock);
        shrink_dentry_list(&tmp);
-        *count = cnt;
 }
 /**
- * prune_dcache - shrink the dcache
+ * prune_dcache_sb - shrink the dcache
- * @count: number of entries to try to free
+ * @nr_to_scan: number of entries to try to free
 *
- * Shrink the dcache. This is done when we need more memory, or simply when we
+ * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
- * need to unmount something (at which point we need to unuse all dentries).
+ * done when we need more memory an called from the superblock shrinker
+ * function.
 *
- * This function may fail to free any resources if all the dentries are in use.
+ * This function may fail to free any resources if all the dentries are in
+ * use.
 */
-static void prune_dcache(int count)
+void prune_dcache_sb(struct super_block *sb, int nr_to_scan)
 {
-        struct super_block *sb, *p = NULL;
+        __shrink_dcache_sb(sb, nr_to_scan, DCACHE_REFERENCED);
-        int w_count;
-        int unused = dentry_stat.nr_unused;
-        int prune_ratio;
-        int pruned;
-        if (unused == 0 || count == 0)
-                return;
-        if (count >= unused)
-                prune_ratio = 1;
-        else
-                prune_ratio = unused / count;
-        spin_lock(&sb_lock);
-        list_for_each_entry(sb, &super_blocks, s_list) {
-                if (list_empty(&sb->s_instances))
-                        continue;
-                if (sb->s_nr_dentry_unused == 0)
-                        continue;
-                sb->s_count++;
-                /* Now, we reclaim unused dentrins with fairness.
-                 * We reclaim them same percentage from each superblock.
-                 * We calculate number of dentries to scan on this sb
-                 * as follows, but the implementation is arranged to avoid
-                 * overflows:
-                 * number of dentries to scan on this sb =
-                 * count * (number of dentries on this sb /
-                 * number of dentries in the machine)
-                 */
-                spin_unlock(&sb_lock);
-                if (prune_ratio != 1)
-                        w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
-                else
-                        w_count = sb->s_nr_dentry_unused;
-                pruned = w_count;
-                /*
-                 * We need to be sure this filesystem isn't being unmounted,
-                 * otherwise we could race with generic_shutdown_super(), and
-                 * end up holding a reference to an inode while the filesystem
-                 * is unmounted.  So we try to get s_umount, and make sure
-                 * s_root isn't NULL.
-                 */
-                if (down_read_trylock(&sb->s_umount)) {
-                        if ((sb->s_root != NULL) &&
-                            (!list_empty(&sb->s_dentry_lru))) {
-                                __shrink_dcache_sb(sb, &w_count,
-                                                DCACHE_REFERENCED);
-                                pruned -= w_count;
-                        }
-                        up_read(&sb->s_umount);
-                }
-                spin_lock(&sb_lock);
-                if (p)
-                        __put_super(p);
-                count -= pruned;
-                p = sb;
-                /* more work left to do? */
-                if (count <= 0)
-                        break;
-        }
-        if (p)
-                __put_super(p);
-        spin_unlock(&sb_lock);
 }
 /**
@@ -1238,42 +1175,10 @@ void shrink_dcache_parent(struct dentry * parent)
        int found;
        while ((found = select_parent(parent)) != 0)
-                __shrink_dcache_sb(sb, &found, 0);
+                __shrink_dcache_sb(sb, found, 0);
 }
 EXPORT_SYMBOL(shrink_dcache_parent);
-/*
- * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
- *
- * We need to avoid reentering the filesystem if the caller is performing a
- * GFP_NOFS allocation attempt.  One example deadlock is:
- *
- * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
- * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
- * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
- *
- * In this case we return -1 to tell the caller that we baled.
- */
-static int shrink_dcache_memory(struct shrinker *shrink,
-                                struct shrink_control *sc)
-{
-        int nr = sc->nr_to_scan;
-        gfp_t gfp_mask = sc->gfp_mask;
-        if (nr) {
-                if (!(gfp_mask & __GFP_FS))
-                        return -1;
-                prune_dcache(nr);
-        }
-        return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
-}
-static struct shrinker dcache_shrinker = {
-        .shrink = shrink_dcache_memory,
-        .seeks = DEFAULT_SEEKS,
-};
 /**
 * __d_alloc    -       allocate a dcache entry
 * @sb: filesystem it will belong to
@@ -3083,8 +2988,6 @@ static void __init dcache_init(void)
         */
        dentry_cache = KMEM_CACHE(dentry,
                SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
-        
-        register_shrinker(&dcache_shrinker);
        /* Hash may have been set up in dcache_init_early */
        if (!hashdist)
author	Dave Chinner <dchinner@redhat.com>	2011-07-08 00:14:42 -0400
committer	Al Viro <viro@zeniv.linux.org.uk>	2011-07-20 20:47:10 -0400
commit	b0d40c92adafde7c2d81203ce7c1c69275f41140 (patch)
tree	f75a19dcd1a37aff23dc43323b58f014b1297c6b /fs/dcache.c
parent	12ad3ab66103e6582ca69c0c9de18b13487eaaef (diff)

diff --git a/fs/dcache.c b/fs/dcache.c index 41e2085d430b..2762804a140d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c
@@ -743,13 +743,11 @@ static void shrink_dentry_list(struct list_head *list)
743	*	743	*
744	* If flags contains DCACHE_REFERENCED reference dentries will not be pruned.	744	* If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
745	*/	745	*/
746	static void __shrink_dcache_sb(struct super_block sb, int count, int flags)	746	static void __shrink_dcache_sb(struct super_block *sb, int count, int flags)
747	{	747	{
748	/* called from prune_dcache() and shrink_dcache_parent() */
749	struct dentry *dentry;	748	struct dentry *dentry;
750	LIST_HEAD(referenced);	749	LIST_HEAD(referenced);
751	LIST_HEAD(tmp);	750	LIST_HEAD(tmp);
752	int cnt = *count;
753		751
754	relock:	752	relock:
755	spin_lock(&dcache_lru_lock);	753	spin_lock(&dcache_lru_lock);
@@ -777,7 +775,7 @@ relock:
777	} else {	775	} else {
778	list_move_tail(&dentry->d_lru, &tmp);	776	list_move_tail(&dentry->d_lru, &tmp);
779	spin_unlock(&dentry->d_lock);	777	spin_unlock(&dentry->d_lock);
780	if (!--cnt)	778	if (!--count)
781	break;	779	break;
782	}	780	}
783	cond_resched_lock(&dcache_lru_lock);	781	cond_resched_lock(&dcache_lru_lock);
@@ -787,83 +785,22 @@ relock:
787	spin_unlock(&dcache_lru_lock);	785	spin_unlock(&dcache_lru_lock);
788		786
789	shrink_dentry_list(&tmp);	787	shrink_dentry_list(&tmp);
790
791	*count = cnt;
792	}	788	}
793		789
794	/**	790	/**
795	* prune_dcache - shrink the dcache	791	* prune_dcache_sb - shrink the dcache
796	* @count: number of entries to try to free	792	* @nr_to_scan: number of entries to try to free
797	*	793	*
798	* Shrink the dcache. This is done when we need more memory, or simply when we	794	* Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
799	* need to unmount something (at which point we need to unuse all dentries).	795	* done when we need more memory an called from the superblock shrinker
		796	* function.
800	*	797	*
801	* This function may fail to free any resources if all the dentries are in use.	798	* This function may fail to free any resources if all the dentries are in
		799	* use.
802	*/	800	*/
803	static void prune_dcache(int count)	801	void prune_dcache_sb(struct super_block *sb, int nr_to_scan)
804	{	802	{
805	struct super_block sb, p = NULL;	803	__shrink_dcache_sb(sb, nr_to_scan, DCACHE_REFERENCED);
806	int w_count;
807	int unused = dentry_stat.nr_unused;
808	int prune_ratio;
809	int pruned;
810
811	if (unused == 0 \|\| count == 0)
812	return;
813	if (count >= unused)
814	prune_ratio = 1;
815	else
816	prune_ratio = unused / count;
817	spin_lock(&sb_lock);
818	list_for_each_entry(sb, &super_blocks, s_list) {
819	if (list_empty(&sb->s_instances))
820	continue;
821	if (sb->s_nr_dentry_unused == 0)
822	continue;
823	sb->s_count++;
824	/* Now, we reclaim unused dentrins with fairness.
825	* We reclaim them same percentage from each superblock.
826	* We calculate number of dentries to scan on this sb
827	* as follows, but the implementation is arranged to avoid
828	* overflows:
829	* number of dentries to scan on this sb =
830	* count * (number of dentries on this sb /
831	* number of dentries in the machine)
832	*/
833	spin_unlock(&sb_lock);
834	if (prune_ratio != 1)
835	w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
836	else
837	w_count = sb->s_nr_dentry_unused;
838	pruned = w_count;
839	/*
840	* We need to be sure this filesystem isn't being unmounted,
841	* otherwise we could race with generic_shutdown_super(), and
842	* end up holding a reference to an inode while the filesystem
843	* is unmounted. So we try to get s_umount, and make sure
844	* s_root isn't NULL.
845	*/
846	if (down_read_trylock(&sb->s_umount)) {
847	if ((sb->s_root != NULL) &&
848	(!list_empty(&sb->s_dentry_lru))) {
849	__shrink_dcache_sb(sb, &w_count,
850	DCACHE_REFERENCED);
851	pruned -= w_count;
852	}
853	up_read(&sb->s_umount);
854	}
855	spin_lock(&sb_lock);
856	if (p)
857	__put_super(p);
858	count -= pruned;
859	p = sb;
860	/* more work left to do? */
861	if (count <= 0)
862	break;
863	}
864	if (p)
865	__put_super(p);
866	spin_unlock(&sb_lock);
867	}	804	}
868		805
869	/**	806	/**
@@ -1238,42 +1175,10 @@ void shrink_dcache_parent(struct dentry * parent)
1238	int found;	1175	int found;
1239		1176
1240	while ((found = select_parent(parent)) != 0)	1177	while ((found = select_parent(parent)) != 0)
1241	__shrink_dcache_sb(sb, &found, 0);	1178	__shrink_dcache_sb(sb, found, 0);
1242	}	1179	}
1243	EXPORT_SYMBOL(shrink_dcache_parent);	1180	EXPORT_SYMBOL(shrink_dcache_parent);
1244		1181
1245	/*
1246	* Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
1247	*
1248	* We need to avoid reentering the filesystem if the caller is performing a
1249	* GFP_NOFS allocation attempt. One example deadlock is:
1250	*
1251	* ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
1252	* prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
1253	* ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
1254	*
1255	* In this case we return -1 to tell the caller that we baled.
1256	*/
1257	static int shrink_dcache_memory(struct shrinker *shrink,
1258	struct shrink_control *sc)
1259	{
1260	int nr = sc->nr_to_scan;
1261	gfp_t gfp_mask = sc->gfp_mask;
1262
1263	if (nr) {
1264	if (!(gfp_mask & __GFP_FS))
1265	return -1;
1266	prune_dcache(nr);
1267	}
1268
1269	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
1270	}
1271
1272	static struct shrinker dcache_shrinker = {
1273	.shrink = shrink_dcache_memory,
1274	.seeks = DEFAULT_SEEKS,
1275	};
1276
1277	/**	1182	/**
1278	* __d_alloc - allocate a dcache entry	1183	* __d_alloc - allocate a dcache entry
1279	* @sb: filesystem it will belong to	1184	* @sb: filesystem it will belong to
@@ -3083,8 +2988,6 @@ static void __init dcache_init(void)
3083	*/	2988	*/
3084	dentry_cache = KMEM_CACHE(dentry,	2989	dentry_cache = KMEM_CACHE(dentry,
3085	SLAB_RECLAIM_ACCOUNT\|SLAB_PANIC\|SLAB_MEM_SPREAD);	2990	SLAB_RECLAIM_ACCOUNT\|SLAB_PANIC\|SLAB_MEM_SPREAD);
3086
3087	register_shrinker(&dcache_shrinker);
3088		2991
3089	/* Hash may have been set up in dcache_init_early */	2992	/* Hash may have been set up in dcache_init_early */
3090	if (!hashdist)	2993	if (!hashdist)