mm/zswap: support multiple swap devices

Cai Liu reporeted that now zbud pool pages counting has a problem when multiple swap is used because it just counts only one swap intead of all of swap so zswap cannot control writeback properly. The result is unnecessary writeback or no writeback when we should really writeback. IOW, it made zswap crazy. Another problem in zswap is: For example, let's assume we use two swap A and B with different priority and A already has charged 19% long time ago and let's assume that A swap is full now so VM start to use B so that B has charged 1% recently. It menas zswap charged (19% + 1%) is full by default. Then, if VM want to swap out more pages into B, zbud_reclaim_page would be evict one of pages in B's pool and it would be repeated continuously. It's totally LRU reverse problem and swap thrashing in B would happen. This patch makes zswap consider mutliple swap by creating *a* zbud pool which will be shared by multiple swap so all of zswap pages in multiple swap keep order by LRU so it can prevent above two problems. Signed-off-by: Minchan Kim <minchan@kernel.org> Reported-by: Cai Liu <cai.liu@samsung.com> Suggested-by: Weijie Yang <weijie.yang.kh@gmail.com> Cc: Seth Jennings <sjennings@variantweb.net> Reviewed-by: Bob Liu <bob.liu@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Minchan Kim <minchan@kernel.org> 2014-04-07 18:38:27 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-04-07 19:36:03 -0400
commit: 60105e1248f571aa3b895cd63bef072ed9d90c77 (patch)
tree: 9e0d526a18bfcfeed78905f2c6ca9b52792c2803 /mm/zswap.c
parent: 6335b19344cc263724ae49a76ed930b21a659055 (diff)
1 files changed, 33 insertions, 31 deletions
diff --git a/mm/zswap.c b/mm/zswap.c
index 25312eb373a0..c0c9b7c80c05 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -89,6 +89,9 @@ static unsigned int zswap_max_pool_percent = 20;
 module_param_named(max_pool_percent,
                        zswap_max_pool_percent, uint, 0644);
+/* zbud_pool is shared by all of zswap backend  */
+static struct zbud_pool *zswap_pool;
 /*********************************
 * compression functions
 **********************************/
@@ -189,7 +192,6 @@ struct zswap_header {
 struct zswap_tree {
        struct rb_root rbroot;
        spinlock_t lock;
-        struct zbud_pool *pool;
 };
 static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
@@ -285,13 +287,12 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
 * Carries out the common pattern of freeing and entry's zbud allocation,
 * freeing the entry itself, and decrementing the number of stored pages.
 */
-static void zswap_free_entry(struct zswap_tree *tree,
+static void zswap_free_entry(struct zswap_entry *entry)
-                        struct zswap_entry *entry)
 {
-        zbud_free(tree->pool, entry->handle);
+        zbud_free(zswap_pool, entry->handle);
        zswap_entry_cache_free(entry);
        atomic_dec(&zswap_stored_pages);
-        zswap_pool_pages = zbud_get_pool_size(tree->pool);
+        zswap_pool_pages = zbud_get_pool_size(zswap_pool);
 }
 /* caller must hold the tree lock */
@@ -311,7 +312,7 @@ static void zswap_entry_put(struct zswap_tree *tree,
        BUG_ON(refcount < 0);
        if (refcount == 0) {
                zswap_rb_erase(&tree->rbroot, entry);
-                zswap_free_entry(tree, entry);
+                zswap_free_entry(entry);
        }
 }
@@ -545,7 +546,6 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
        zbud_unmap(pool, handle);
        tree = zswap_trees[swp_type(swpentry)];
        offset = swp_offset(swpentry);
-        BUG_ON(pool != tree->pool);
        /* find and ref zswap entry */
        spin_lock(&tree->lock);
@@ -573,13 +573,13 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
        case ZSWAP_SWAPCACHE_NEW: /* page is locked */
                /* decompress */
                dlen = PAGE_SIZE;
-                src = (u8 *)zbud_map(tree->pool, entry->handle) +
+                src = (u8 *)zbud_map(zswap_pool, entry->handle) +
                        sizeof(struct zswap_header);
                dst = kmap_atomic(page);
                ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
                                entry->length, dst, &dlen);
                kunmap_atomic(dst);
-                zbud_unmap(tree->pool, entry->handle);
+                zbud_unmap(zswap_pool, entry->handle);
                BUG_ON(ret);
                BUG_ON(dlen != PAGE_SIZE);
@@ -652,7 +652,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
        /* reclaim space if needed */
        if (zswap_is_full()) {
                zswap_pool_limit_hit++;
-                if (zbud_reclaim_page(tree->pool, 8)) {
+                if (zbud_reclaim_page(zswap_pool, 8)) {
                        zswap_reject_reclaim_fail++;
                        ret = -ENOMEM;
                        goto reject;
@@ -679,7 +679,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
        /* store */
        len = dlen + sizeof(struct zswap_header);
-        ret = zbud_alloc(tree->pool, len, __GFP_NORETRY | __GFP_NOWARN,
+        ret = zbud_alloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
                &handle);
        if (ret == -ENOSPC) {
                zswap_reject_compress_poor++;
@@ -689,11 +689,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
                zswap_reject_alloc_fail++;
                goto freepage;
        }
-        zhdr = zbud_map(tree->pool, handle);
+        zhdr = zbud_map(zswap_pool, handle);
        zhdr->swpentry = swp_entry(type, offset);
        buf = (u8 *)(zhdr + 1);
        memcpy(buf, dst, dlen);
-        zbud_unmap(tree->pool, handle);
+        zbud_unmap(zswap_pool, handle);
        put_cpu_var(zswap_dstmem);
        /* populate entry */
@@ -716,7 +716,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
        /* update stats */
        atomic_inc(&zswap_stored_pages);
-        zswap_pool_pages = zbud_get_pool_size(tree->pool);
+        zswap_pool_pages = zbud_get_pool_size(zswap_pool);
        return 0;
@@ -752,13 +752,13 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
        /* decompress */
        dlen = PAGE_SIZE;
-        src = (u8 *)zbud_map(tree->pool, entry->handle) +
+        src = (u8 *)zbud_map(zswap_pool, entry->handle) +
                        sizeof(struct zswap_header);
        dst = kmap_atomic(page);
        ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
                dst, &dlen);
        kunmap_atomic(dst);
-        zbud_unmap(tree->pool, entry->handle);
+        zbud_unmap(zswap_pool, entry->handle);
        BUG_ON(ret);
        spin_lock(&tree->lock);
@@ -804,11 +804,9 @@ static void zswap_frontswap_invalidate_area(unsigned type)
        /* walk the tree and free everything */
        spin_lock(&tree->lock);
        rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
-                zswap_free_entry(tree, entry);
+                zswap_free_entry(entry);
        tree->rbroot = RB_ROOT;
        spin_unlock(&tree->lock);
-        zbud_destroy_pool(tree->pool);
        kfree(tree);
        zswap_trees[type] = NULL;
 }
@@ -822,20 +820,14 @@ static void zswap_frontswap_init(unsigned type)
        struct zswap_tree *tree;
        tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL);
-        if (!tree)
+        if (!tree) {
-                goto err;
+                pr_err("alloc failed, zswap disabled for swap type %d\n", type);
-        tree->pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
+                return;
-        if (!tree->pool)
+        }
-                goto freetree;
        tree->rbroot = RB_ROOT;
        spin_lock_init(&tree->lock);
        zswap_trees[type] = tree;
-        return;
-freetree:
-        kfree(tree);
-err:
-        pr_err("alloc failed, zswap disabled for swap type %d\n", type);
 }
 static struct frontswap_ops zswap_frontswap_ops = {
@@ -907,9 +899,16 @@ static int __init init_zswap(void)
                return 0;
        pr_info("loading zswap\n");
+        zswap_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
+        if (!zswap_pool) {
+                pr_err("zbud pool creation failed\n");
+                goto error;
+        }
        if (zswap_entry_cache_create()) {
                pr_err("entry cache creation failed\n");
-                goto error;
+                goto cachefail;
        }
        if (zswap_comp_init()) {
                pr_err("compressor initialization failed\n");
@@ -919,6 +918,7 @@ static int __init init_zswap(void)
                pr_err("per-cpu initialization failed\n");
                goto pcpufail;
        }
        frontswap_register_ops(&zswap_frontswap_ops);
        if (zswap_debugfs_init())
                pr_warn("debugfs initialization failed\n");
@@ -927,6 +927,8 @@ pcpufail:
        zswap_comp_exit();
 compfail:
        zswap_entry_cache_destory();
+cachefail:
+        zbud_destroy_pool(zswap_pool);
 error:
        return -ENOMEM;
 }
author	Minchan Kim <minchan@kernel.org>	2014-04-07 18:38:27 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-04-07 19:36:03 -0400
commit	60105e1248f571aa3b895cd63bef072ed9d90c77 (patch)
tree	9e0d526a18bfcfeed78905f2c6ca9b52792c2803 /mm/zswap.c
parent	6335b19344cc263724ae49a76ed930b21a659055 (diff)

diff --git a/mm/zswap.c b/mm/zswap.c index 25312eb373a0..c0c9b7c80c05 100644 --- a/mm/zswap.c +++ b/mm/zswap.c
@@ -89,6 +89,9 @@ static unsigned int zswap_max_pool_percent = 20;
89	module_param_named(max_pool_percent,	89	module_param_named(max_pool_percent,
90	zswap_max_pool_percent, uint, 0644);	90	zswap_max_pool_percent, uint, 0644);
91		91
		92	/* zbud_pool is shared by all of zswap backend */
		93	static struct zbud_pool *zswap_pool;
		94
92	/*********************************	95	/*********************************
93	* compression functions	96	* compression functions
94	**********************************/	97	**********************************/
@@ -189,7 +192,6 @@ struct zswap_header {
189	struct zswap_tree {	192	struct zswap_tree {
190	struct rb_root rbroot;	193	struct rb_root rbroot;
191	spinlock_t lock;	194	spinlock_t lock;
192	struct zbud_pool *pool;
193	};	195	};
194		196
195	static struct zswap_tree *zswap_trees[MAX_SWAPFILES];	197	static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
@@ -285,13 +287,12 @@ static void zswap_rb_erase(struct rb_root root, struct zswap_entry entry)
285	* Carries out the common pattern of freeing and entry's zbud allocation,	287	* Carries out the common pattern of freeing and entry's zbud allocation,
286	* freeing the entry itself, and decrementing the number of stored pages.	288	* freeing the entry itself, and decrementing the number of stored pages.
287	*/	289	*/
288	static void zswap_free_entry(struct zswap_tree *tree,	290	static void zswap_free_entry(struct zswap_entry *entry)
289	struct zswap_entry *entry)
290	{	291	{
291	zbud_free(tree->pool, entry->handle);	292	zbud_free(zswap_pool, entry->handle);
292	zswap_entry_cache_free(entry);	293	zswap_entry_cache_free(entry);
293	atomic_dec(&zswap_stored_pages);	294	atomic_dec(&zswap_stored_pages);
294	zswap_pool_pages = zbud_get_pool_size(tree->pool);	295	zswap_pool_pages = zbud_get_pool_size(zswap_pool);
295	}	296	}
296		297
297	/* caller must hold the tree lock */	298	/* caller must hold the tree lock */
@@ -311,7 +312,7 @@ static void zswap_entry_put(struct zswap_tree *tree,
311	BUG_ON(refcount < 0);	312	BUG_ON(refcount < 0);
312	if (refcount == 0) {	313	if (refcount == 0) {
313	zswap_rb_erase(&tree->rbroot, entry);	314	zswap_rb_erase(&tree->rbroot, entry);
314	zswap_free_entry(tree, entry);	315	zswap_free_entry(entry);
315	}	316	}
316	}	317	}
317		318
@@ -545,7 +546,6 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
545	zbud_unmap(pool, handle);	546	zbud_unmap(pool, handle);
546	tree = zswap_trees[swp_type(swpentry)];	547	tree = zswap_trees[swp_type(swpentry)];
547	offset = swp_offset(swpentry);	548	offset = swp_offset(swpentry);
548	BUG_ON(pool != tree->pool);
549		549
550	/* find and ref zswap entry */	550	/* find and ref zswap entry */
551	spin_lock(&tree->lock);	551	spin_lock(&tree->lock);
@@ -573,13 +573,13 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
573	case ZSWAP_SWAPCACHE_NEW: /* page is locked */	573	case ZSWAP_SWAPCACHE_NEW: /* page is locked */
574	/* decompress */	574	/* decompress */
575	dlen = PAGE_SIZE;	575	dlen = PAGE_SIZE;
576	src = (u8 *)zbud_map(tree->pool, entry->handle) +	576	src = (u8 *)zbud_map(zswap_pool, entry->handle) +
577	sizeof(struct zswap_header);	577	sizeof(struct zswap_header);
578	dst = kmap_atomic(page);	578	dst = kmap_atomic(page);
579	ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,	579	ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
580	entry->length, dst, &dlen);	580	entry->length, dst, &dlen);
581	kunmap_atomic(dst);	581	kunmap_atomic(dst);
582	zbud_unmap(tree->pool, entry->handle);	582	zbud_unmap(zswap_pool, entry->handle);
583	BUG_ON(ret);	583	BUG_ON(ret);
584	BUG_ON(dlen != PAGE_SIZE);	584	BUG_ON(dlen != PAGE_SIZE);
585		585
@@ -652,7 +652,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
652	/* reclaim space if needed */	652	/* reclaim space if needed */
653	if (zswap_is_full()) {	653	if (zswap_is_full()) {
654	zswap_pool_limit_hit++;	654	zswap_pool_limit_hit++;
655	if (zbud_reclaim_page(tree->pool, 8)) {	655	if (zbud_reclaim_page(zswap_pool, 8)) {
656	zswap_reject_reclaim_fail++;	656	zswap_reject_reclaim_fail++;
657	ret = -ENOMEM;	657	ret = -ENOMEM;
658	goto reject;	658	goto reject;
@@ -679,7 +679,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
679		679
680	/* store */	680	/* store */
681	len = dlen + sizeof(struct zswap_header);	681	len = dlen + sizeof(struct zswap_header);
682	ret = zbud_alloc(tree->pool, len, __GFP_NORETRY \| __GFP_NOWARN,	682	ret = zbud_alloc(zswap_pool, len, __GFP_NORETRY \| __GFP_NOWARN,
683	&handle);	683	&handle);
684	if (ret == -ENOSPC) {	684	if (ret == -ENOSPC) {
685	zswap_reject_compress_poor++;	685	zswap_reject_compress_poor++;
@@ -689,11 +689,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
689	zswap_reject_alloc_fail++;	689	zswap_reject_alloc_fail++;
690	goto freepage;	690	goto freepage;
691	}	691	}
692	zhdr = zbud_map(tree->pool, handle);	692	zhdr = zbud_map(zswap_pool, handle);
693	zhdr->swpentry = swp_entry(type, offset);	693	zhdr->swpentry = swp_entry(type, offset);
694	buf = (u8 *)(zhdr + 1);	694	buf = (u8 *)(zhdr + 1);
695	memcpy(buf, dst, dlen);	695	memcpy(buf, dst, dlen);
696	zbud_unmap(tree->pool, handle);	696	zbud_unmap(zswap_pool, handle);
697	put_cpu_var(zswap_dstmem);	697	put_cpu_var(zswap_dstmem);
698		698
699	/* populate entry */	699	/* populate entry */
@@ -716,7 +716,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
716		716
717	/* update stats */	717	/* update stats */
718	atomic_inc(&zswap_stored_pages);	718	atomic_inc(&zswap_stored_pages);
719	zswap_pool_pages = zbud_get_pool_size(tree->pool);	719	zswap_pool_pages = zbud_get_pool_size(zswap_pool);
720		720
721	return 0;	721	return 0;
722		722
@@ -752,13 +752,13 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
752		752
753	/* decompress */	753	/* decompress */
754	dlen = PAGE_SIZE;	754	dlen = PAGE_SIZE;
755	src = (u8 *)zbud_map(tree->pool, entry->handle) +	755	src = (u8 *)zbud_map(zswap_pool, entry->handle) +
756	sizeof(struct zswap_header);	756	sizeof(struct zswap_header);
757	dst = kmap_atomic(page);	757	dst = kmap_atomic(page);
758	ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,	758	ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
759	dst, &dlen);	759	dst, &dlen);
760	kunmap_atomic(dst);	760	kunmap_atomic(dst);
761	zbud_unmap(tree->pool, entry->handle);	761	zbud_unmap(zswap_pool, entry->handle);
762	BUG_ON(ret);	762	BUG_ON(ret);
763		763
764	spin_lock(&tree->lock);	764	spin_lock(&tree->lock);
@@ -804,11 +804,9 @@ static void zswap_frontswap_invalidate_area(unsigned type)
804	/* walk the tree and free everything */	804	/* walk the tree and free everything */
805	spin_lock(&tree->lock);	805	spin_lock(&tree->lock);
806	rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)	806	rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
807	zswap_free_entry(tree, entry);	807	zswap_free_entry(entry);
808	tree->rbroot = RB_ROOT;	808	tree->rbroot = RB_ROOT;
809	spin_unlock(&tree->lock);	809	spin_unlock(&tree->lock);
810
811	zbud_destroy_pool(tree->pool);
812	kfree(tree);	810	kfree(tree);
813	zswap_trees[type] = NULL;	811	zswap_trees[type] = NULL;
814	}	812	}
@@ -822,20 +820,14 @@ static void zswap_frontswap_init(unsigned type)
822	struct zswap_tree *tree;	820	struct zswap_tree *tree;
823		821
824	tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL);	822	tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL);
825	if (!tree)	823	if (!tree) {
826	goto err;	824	pr_err("alloc failed, zswap disabled for swap type %d\n", type);
827	tree->pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);	825	return;
828	if (!tree->pool)	826	}
829	goto freetree;	827
830	tree->rbroot = RB_ROOT;	828	tree->rbroot = RB_ROOT;
831	spin_lock_init(&tree->lock);	829	spin_lock_init(&tree->lock);
832	zswap_trees[type] = tree;	830	zswap_trees[type] = tree;
833	return;
834
835	freetree:
836	kfree(tree);
837	err:
838	pr_err("alloc failed, zswap disabled for swap type %d\n", type);
839	}	831	}
840		832
841	static struct frontswap_ops zswap_frontswap_ops = {	833	static struct frontswap_ops zswap_frontswap_ops = {
@@ -907,9 +899,16 @@ static int __init init_zswap(void)
907	return 0;	899	return 0;
908		900
909	pr_info("loading zswap\n");	901	pr_info("loading zswap\n");
		902
		903	zswap_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
		904	if (!zswap_pool) {
		905	pr_err("zbud pool creation failed\n");
		906	goto error;
		907	}
		908
910	if (zswap_entry_cache_create()) {	909	if (zswap_entry_cache_create()) {
911	pr_err("entry cache creation failed\n");	910	pr_err("entry cache creation failed\n");
912	goto error;	911	goto cachefail;
913	}	912	}
914	if (zswap_comp_init()) {	913	if (zswap_comp_init()) {
915	pr_err("compressor initialization failed\n");	914	pr_err("compressor initialization failed\n");
@@ -919,6 +918,7 @@ static int __init init_zswap(void)
919	pr_err("per-cpu initialization failed\n");	918	pr_err("per-cpu initialization failed\n");
920	goto pcpufail;	919	goto pcpufail;
921	}	920	}
		921
922	frontswap_register_ops(&zswap_frontswap_ops);	922	frontswap_register_ops(&zswap_frontswap_ops);
923	if (zswap_debugfs_init())	923	if (zswap_debugfs_init())
924	pr_warn("debugfs initialization failed\n");	924	pr_warn("debugfs initialization failed\n");
@@ -927,6 +927,8 @@ pcpufail:
927	zswap_comp_exit();	927	zswap_comp_exit();
928	compfail:	928	compfail:
929	zswap_entry_cache_destory();	929	zswap_entry_cache_destory();
		930	cachefail:
		931	zbud_destroy_pool(zswap_pool);
930	error:	932	error:
931	return -ENOMEM;	933	return -ENOMEM;
932	}	934	}