aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMinchan Kim <minchan@kernel.org>2014-04-07 18:38:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-07 19:36:03 -0400
commit60105e1248f571aa3b895cd63bef072ed9d90c77 (patch)
tree9e0d526a18bfcfeed78905f2c6ca9b52792c2803 /mm
parent6335b19344cc263724ae49a76ed930b21a659055 (diff)
mm/zswap: support multiple swap devices
Cai Liu reporeted that now zbud pool pages counting has a problem when multiple swap is used because it just counts only one swap intead of all of swap so zswap cannot control writeback properly. The result is unnecessary writeback or no writeback when we should really writeback. IOW, it made zswap crazy. Another problem in zswap is: For example, let's assume we use two swap A and B with different priority and A already has charged 19% long time ago and let's assume that A swap is full now so VM start to use B so that B has charged 1% recently. It menas zswap charged (19% + 1%) is full by default. Then, if VM want to swap out more pages into B, zbud_reclaim_page would be evict one of pages in B's pool and it would be repeated continuously. It's totally LRU reverse problem and swap thrashing in B would happen. This patch makes zswap consider mutliple swap by creating *a* zbud pool which will be shared by multiple swap so all of zswap pages in multiple swap keep order by LRU so it can prevent above two problems. Signed-off-by: Minchan Kim <minchan@kernel.org> Reported-by: Cai Liu <cai.liu@samsung.com> Suggested-by: Weijie Yang <weijie.yang.kh@gmail.com> Cc: Seth Jennings <sjennings@variantweb.net> Reviewed-by: Bob Liu <bob.liu@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/zswap.c64
1 files changed, 33 insertions, 31 deletions
diff --git a/mm/zswap.c b/mm/zswap.c
index 25312eb373a0..c0c9b7c80c05 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -89,6 +89,9 @@ static unsigned int zswap_max_pool_percent = 20;
89module_param_named(max_pool_percent, 89module_param_named(max_pool_percent,
90 zswap_max_pool_percent, uint, 0644); 90 zswap_max_pool_percent, uint, 0644);
91 91
92/* zbud_pool is shared by all of zswap backend */
93static struct zbud_pool *zswap_pool;
94
92/********************************* 95/*********************************
93* compression functions 96* compression functions
94**********************************/ 97**********************************/
@@ -189,7 +192,6 @@ struct zswap_header {
189struct zswap_tree { 192struct zswap_tree {
190 struct rb_root rbroot; 193 struct rb_root rbroot;
191 spinlock_t lock; 194 spinlock_t lock;
192 struct zbud_pool *pool;
193}; 195};
194 196
195static struct zswap_tree *zswap_trees[MAX_SWAPFILES]; 197static struct zswap_tree *zswap_trees[MAX_SWAPFILES];
@@ -285,13 +287,12 @@ static void zswap_rb_erase(struct rb_root *root, struct zswap_entry *entry)
285 * Carries out the common pattern of freeing and entry's zbud allocation, 287 * Carries out the common pattern of freeing and entry's zbud allocation,
286 * freeing the entry itself, and decrementing the number of stored pages. 288 * freeing the entry itself, and decrementing the number of stored pages.
287 */ 289 */
288static void zswap_free_entry(struct zswap_tree *tree, 290static void zswap_free_entry(struct zswap_entry *entry)
289 struct zswap_entry *entry)
290{ 291{
291 zbud_free(tree->pool, entry->handle); 292 zbud_free(zswap_pool, entry->handle);
292 zswap_entry_cache_free(entry); 293 zswap_entry_cache_free(entry);
293 atomic_dec(&zswap_stored_pages); 294 atomic_dec(&zswap_stored_pages);
294 zswap_pool_pages = zbud_get_pool_size(tree->pool); 295 zswap_pool_pages = zbud_get_pool_size(zswap_pool);
295} 296}
296 297
297/* caller must hold the tree lock */ 298/* caller must hold the tree lock */
@@ -311,7 +312,7 @@ static void zswap_entry_put(struct zswap_tree *tree,
311 BUG_ON(refcount < 0); 312 BUG_ON(refcount < 0);
312 if (refcount == 0) { 313 if (refcount == 0) {
313 zswap_rb_erase(&tree->rbroot, entry); 314 zswap_rb_erase(&tree->rbroot, entry);
314 zswap_free_entry(tree, entry); 315 zswap_free_entry(entry);
315 } 316 }
316} 317}
317 318
@@ -545,7 +546,6 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
545 zbud_unmap(pool, handle); 546 zbud_unmap(pool, handle);
546 tree = zswap_trees[swp_type(swpentry)]; 547 tree = zswap_trees[swp_type(swpentry)];
547 offset = swp_offset(swpentry); 548 offset = swp_offset(swpentry);
548 BUG_ON(pool != tree->pool);
549 549
550 /* find and ref zswap entry */ 550 /* find and ref zswap entry */
551 spin_lock(&tree->lock); 551 spin_lock(&tree->lock);
@@ -573,13 +573,13 @@ static int zswap_writeback_entry(struct zbud_pool *pool, unsigned long handle)
573 case ZSWAP_SWAPCACHE_NEW: /* page is locked */ 573 case ZSWAP_SWAPCACHE_NEW: /* page is locked */
574 /* decompress */ 574 /* decompress */
575 dlen = PAGE_SIZE; 575 dlen = PAGE_SIZE;
576 src = (u8 *)zbud_map(tree->pool, entry->handle) + 576 src = (u8 *)zbud_map(zswap_pool, entry->handle) +
577 sizeof(struct zswap_header); 577 sizeof(struct zswap_header);
578 dst = kmap_atomic(page); 578 dst = kmap_atomic(page);
579 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, 579 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src,
580 entry->length, dst, &dlen); 580 entry->length, dst, &dlen);
581 kunmap_atomic(dst); 581 kunmap_atomic(dst);
582 zbud_unmap(tree->pool, entry->handle); 582 zbud_unmap(zswap_pool, entry->handle);
583 BUG_ON(ret); 583 BUG_ON(ret);
584 BUG_ON(dlen != PAGE_SIZE); 584 BUG_ON(dlen != PAGE_SIZE);
585 585
@@ -652,7 +652,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
652 /* reclaim space if needed */ 652 /* reclaim space if needed */
653 if (zswap_is_full()) { 653 if (zswap_is_full()) {
654 zswap_pool_limit_hit++; 654 zswap_pool_limit_hit++;
655 if (zbud_reclaim_page(tree->pool, 8)) { 655 if (zbud_reclaim_page(zswap_pool, 8)) {
656 zswap_reject_reclaim_fail++; 656 zswap_reject_reclaim_fail++;
657 ret = -ENOMEM; 657 ret = -ENOMEM;
658 goto reject; 658 goto reject;
@@ -679,7 +679,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
679 679
680 /* store */ 680 /* store */
681 len = dlen + sizeof(struct zswap_header); 681 len = dlen + sizeof(struct zswap_header);
682 ret = zbud_alloc(tree->pool, len, __GFP_NORETRY | __GFP_NOWARN, 682 ret = zbud_alloc(zswap_pool, len, __GFP_NORETRY | __GFP_NOWARN,
683 &handle); 683 &handle);
684 if (ret == -ENOSPC) { 684 if (ret == -ENOSPC) {
685 zswap_reject_compress_poor++; 685 zswap_reject_compress_poor++;
@@ -689,11 +689,11 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
689 zswap_reject_alloc_fail++; 689 zswap_reject_alloc_fail++;
690 goto freepage; 690 goto freepage;
691 } 691 }
692 zhdr = zbud_map(tree->pool, handle); 692 zhdr = zbud_map(zswap_pool, handle);
693 zhdr->swpentry = swp_entry(type, offset); 693 zhdr->swpentry = swp_entry(type, offset);
694 buf = (u8 *)(zhdr + 1); 694 buf = (u8 *)(zhdr + 1);
695 memcpy(buf, dst, dlen); 695 memcpy(buf, dst, dlen);
696 zbud_unmap(tree->pool, handle); 696 zbud_unmap(zswap_pool, handle);
697 put_cpu_var(zswap_dstmem); 697 put_cpu_var(zswap_dstmem);
698 698
699 /* populate entry */ 699 /* populate entry */
@@ -716,7 +716,7 @@ static int zswap_frontswap_store(unsigned type, pgoff_t offset,
716 716
717 /* update stats */ 717 /* update stats */
718 atomic_inc(&zswap_stored_pages); 718 atomic_inc(&zswap_stored_pages);
719 zswap_pool_pages = zbud_get_pool_size(tree->pool); 719 zswap_pool_pages = zbud_get_pool_size(zswap_pool);
720 720
721 return 0; 721 return 0;
722 722
@@ -752,13 +752,13 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
752 752
753 /* decompress */ 753 /* decompress */
754 dlen = PAGE_SIZE; 754 dlen = PAGE_SIZE;
755 src = (u8 *)zbud_map(tree->pool, entry->handle) + 755 src = (u8 *)zbud_map(zswap_pool, entry->handle) +
756 sizeof(struct zswap_header); 756 sizeof(struct zswap_header);
757 dst = kmap_atomic(page); 757 dst = kmap_atomic(page);
758 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length, 758 ret = zswap_comp_op(ZSWAP_COMPOP_DECOMPRESS, src, entry->length,
759 dst, &dlen); 759 dst, &dlen);
760 kunmap_atomic(dst); 760 kunmap_atomic(dst);
761 zbud_unmap(tree->pool, entry->handle); 761 zbud_unmap(zswap_pool, entry->handle);
762 BUG_ON(ret); 762 BUG_ON(ret);
763 763
764 spin_lock(&tree->lock); 764 spin_lock(&tree->lock);
@@ -804,11 +804,9 @@ static void zswap_frontswap_invalidate_area(unsigned type)
804 /* walk the tree and free everything */ 804 /* walk the tree and free everything */
805 spin_lock(&tree->lock); 805 spin_lock(&tree->lock);
806 rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) 806 rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode)
807 zswap_free_entry(tree, entry); 807 zswap_free_entry(entry);
808 tree->rbroot = RB_ROOT; 808 tree->rbroot = RB_ROOT;
809 spin_unlock(&tree->lock); 809 spin_unlock(&tree->lock);
810
811 zbud_destroy_pool(tree->pool);
812 kfree(tree); 810 kfree(tree);
813 zswap_trees[type] = NULL; 811 zswap_trees[type] = NULL;
814} 812}
@@ -822,20 +820,14 @@ static void zswap_frontswap_init(unsigned type)
822 struct zswap_tree *tree; 820 struct zswap_tree *tree;
823 821
824 tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL); 822 tree = kzalloc(sizeof(struct zswap_tree), GFP_KERNEL);
825 if (!tree) 823 if (!tree) {
826 goto err; 824 pr_err("alloc failed, zswap disabled for swap type %d\n", type);
827 tree->pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops); 825 return;
828 if (!tree->pool) 826 }
829 goto freetree; 827
830 tree->rbroot = RB_ROOT; 828 tree->rbroot = RB_ROOT;
831 spin_lock_init(&tree->lock); 829 spin_lock_init(&tree->lock);
832 zswap_trees[type] = tree; 830 zswap_trees[type] = tree;
833 return;
834
835freetree:
836 kfree(tree);
837err:
838 pr_err("alloc failed, zswap disabled for swap type %d\n", type);
839} 831}
840 832
841static struct frontswap_ops zswap_frontswap_ops = { 833static struct frontswap_ops zswap_frontswap_ops = {
@@ -907,9 +899,16 @@ static int __init init_zswap(void)
907 return 0; 899 return 0;
908 900
909 pr_info("loading zswap\n"); 901 pr_info("loading zswap\n");
902
903 zswap_pool = zbud_create_pool(GFP_KERNEL, &zswap_zbud_ops);
904 if (!zswap_pool) {
905 pr_err("zbud pool creation failed\n");
906 goto error;
907 }
908
910 if (zswap_entry_cache_create()) { 909 if (zswap_entry_cache_create()) {
911 pr_err("entry cache creation failed\n"); 910 pr_err("entry cache creation failed\n");
912 goto error; 911 goto cachefail;
913 } 912 }
914 if (zswap_comp_init()) { 913 if (zswap_comp_init()) {
915 pr_err("compressor initialization failed\n"); 914 pr_err("compressor initialization failed\n");
@@ -919,6 +918,7 @@ static int __init init_zswap(void)
919 pr_err("per-cpu initialization failed\n"); 918 pr_err("per-cpu initialization failed\n");
920 goto pcpufail; 919 goto pcpufail;
921 } 920 }
921
922 frontswap_register_ops(&zswap_frontswap_ops); 922 frontswap_register_ops(&zswap_frontswap_ops);
923 if (zswap_debugfs_init()) 923 if (zswap_debugfs_init())
924 pr_warn("debugfs initialization failed\n"); 924 pr_warn("debugfs initialization failed\n");
@@ -927,6 +927,8 @@ pcpufail:
927 zswap_comp_exit(); 927 zswap_comp_exit();
928compfail: 928compfail:
929 zswap_entry_cache_destory(); 929 zswap_entry_cache_destory();
930cachefail:
931 zbud_destroy_pool(zswap_pool);
930error: 932error:
931 return -ENOMEM; 933 return -ENOMEM;
932} 934}