aboutsummaryrefslogtreecommitdiffstats
path: root/mm/zsmalloc.c
diff options
context:
space:
mode:
authorJoonsoo Kim <iamjoonsoo.kim@lge.com>2014-12-12 19:56:44 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-13 15:42:49 -0500
commit9eec4cd53f9865b733dc78cf5f6465871beed014 (patch)
treed1d8df0008e605683c54595ebde476d89a94fcf2 /mm/zsmalloc.c
parent70bc068c4f94e42b79de8f80d0aa560bc6daedec (diff)
zsmalloc: merge size_class to reduce fragmentation
zsmalloc has many size_classes to reduce fragmentation and they are in 16 bytes unit, for example, 16, 32, 48, etc., if PAGE_SIZE is 4096. And, zsmalloc has constraint that each zspage has 4 pages at maximum. In this situation, we can see interesting aspect. Let's think about size_class for 1488, 1472, ..., 1376. To prevent external fragmentation, they uses 4 pages per zspage and so all they can contain 11 objects at maximum. 16384 (4096 * 4) = 1488 * 11 + remains 16384 (4096 * 4) = 1472 * 11 + remains 16384 (4096 * 4) = ... 16384 (4096 * 4) = 1376 * 11 + remains It means that they have same characteristics and classification between them isn't needed. If we use one size_class for them, we can reduce fragementation and save some memory since both the 1488 and 1472 sized classes can only fit 11 objects into 4 pages, and an object that's 1472 bytes can fit into an object that's 1488 bytes, merging these classes to always use objects that are 1488 bytes will reduce the total number of size classes. And reducing the total number of size classes reduces overall fragmentation, because a wider range of compressed pages can fit into a single size class, leaving less unused objects in each size class. For this purpose, this patch implement size_class merging. If there is size_class that have same pages_per_zspage and same number of objects per zspage with previous size_class, we don't create new size_class. Instead, we use previous, same characteristic size_class. With this way, above example sizes (1488, 1472, ..., 1376) use just one size_class so we can get much more memory utilization. Below is result of my simple test. TEST ENV: EXT4 on zram, mount with discard option WORKLOAD: untar kernel source code, remove directory in descending order in size. (drivers arch fs sound include net Documentation firmware kernel tools) Each line represents orig_data_size, compr_data_size, mem_used_total, fragmentation overhead (mem_used - compr_data_size) and overhead ratio (overhead to compr_data_size), respectively, after untar and remove operation is executed. * untar-nomerge.out orig_size compr_size used_size overhead overhead_ratio 525.88MB 199.16MB 210.23MB 11.08MB 5.56% 288.32MB 97.43MB 105.63MB 8.20MB 8.41% 177.32MB 61.12MB 69.40MB 8.28MB 13.55% 146.47MB 47.32MB 56.10MB 8.78MB 18.55% 124.16MB 38.85MB 48.41MB 9.55MB 24.58% 103.93MB 31.68MB 40.93MB 9.25MB 29.21% 84.34MB 22.86MB 32.72MB 9.86MB 43.13% 66.87MB 14.83MB 23.83MB 9.00MB 60.70% 60.67MB 11.11MB 18.60MB 7.49MB 67.48% 55.86MB 8.83MB 16.61MB 7.77MB 88.03% 53.32MB 8.01MB 15.32MB 7.31MB 91.24% * untar-merge.out orig_size compr_size used_size overhead overhead_ratio 526.23MB 199.18MB 209.81MB 10.64MB 5.34% 288.68MB 97.45MB 104.08MB 6.63MB 6.80% 177.68MB 61.14MB 66.93MB 5.79MB 9.47% 146.83MB 47.34MB 52.79MB 5.45MB 11.51% 124.52MB 38.87MB 44.30MB 5.43MB 13.96% 104.29MB 31.70MB 36.83MB 5.13MB 16.19% 84.70MB 22.88MB 27.92MB 5.04MB 22.04% 67.11MB 14.83MB 19.26MB 4.43MB 29.86% 60.82MB 11.10MB 14.90MB 3.79MB 34.17% 55.90MB 8.82MB 12.61MB 3.79MB 42.97% 53.32MB 8.01MB 11.73MB 3.73MB 46.53% As you can see above result, merged one has better utilization (overhead ratio, 5th column) and uses less memory (mem_used_total, 3rd column). Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Nitin Gupta <ngupta@vflare.org> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Reviewed-by: Dan Streetman <ddstreet@ieee.org> Cc: Luigi Semenzato <semenzato@google.com> Cc: <juno.choi@lge.com> Cc: "seungho1.park" <seungho1.park@lge.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/zsmalloc.c')
-rw-r--r--mm/zsmalloc.c80
1 files changed, 66 insertions, 14 deletions
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 839a48c3ca27..b3b57ef85830 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -214,7 +214,7 @@ struct link_free {
214}; 214};
215 215
216struct zs_pool { 216struct zs_pool {
217 struct size_class size_class[ZS_SIZE_CLASSES]; 217 struct size_class *size_class[ZS_SIZE_CLASSES];
218 218
219 gfp_t flags; /* allocation flags used when growing pool */ 219 gfp_t flags; /* allocation flags used when growing pool */
220 atomic_long_t pages_allocated; 220 atomic_long_t pages_allocated;
@@ -468,7 +468,7 @@ static enum fullness_group fix_fullness_group(struct zs_pool *pool,
468 if (newfg == currfg) 468 if (newfg == currfg)
469 goto out; 469 goto out;
470 470
471 class = &pool->size_class[class_idx]; 471 class = pool->size_class[class_idx];
472 remove_zspage(page, class, currfg); 472 remove_zspage(page, class, currfg);
473 insert_zspage(page, class, newfg); 473 insert_zspage(page, class, newfg);
474 set_zspage_mapping(page, class_idx, newfg); 474 set_zspage_mapping(page, class_idx, newfg);
@@ -925,6 +925,23 @@ fail:
925 return notifier_to_errno(ret); 925 return notifier_to_errno(ret);
926} 926}
927 927
928static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
929{
930 return pages_per_zspage * PAGE_SIZE / size;
931}
932
933static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)
934{
935 if (prev->pages_per_zspage != pages_per_zspage)
936 return false;
937
938 if (get_maxobj_per_zspage(prev->size, prev->pages_per_zspage)
939 != get_maxobj_per_zspage(size, pages_per_zspage))
940 return false;
941
942 return true;
943}
944
928/** 945/**
929 * zs_create_pool - Creates an allocation pool to work from. 946 * zs_create_pool - Creates an allocation pool to work from.
930 * @flags: allocation flags used to allocate pool metadata 947 * @flags: allocation flags used to allocate pool metadata
@@ -945,25 +962,56 @@ struct zs_pool *zs_create_pool(gfp_t flags)
945 if (!pool) 962 if (!pool)
946 return NULL; 963 return NULL;
947 964
948 for (i = 0; i < ZS_SIZE_CLASSES; i++) { 965 /*
966 * Iterate reversly, because, size of size_class that we want to use
967 * for merging should be larger or equal to current size.
968 */
969 for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
949 int size; 970 int size;
971 int pages_per_zspage;
950 struct size_class *class; 972 struct size_class *class;
973 struct size_class *prev_class;
951 974
952 size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA; 975 size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
953 if (size > ZS_MAX_ALLOC_SIZE) 976 if (size > ZS_MAX_ALLOC_SIZE)
954 size = ZS_MAX_ALLOC_SIZE; 977 size = ZS_MAX_ALLOC_SIZE;
978 pages_per_zspage = get_pages_per_zspage(size);
979
980 /*
981 * size_class is used for normal zsmalloc operation such
982 * as alloc/free for that size. Although it is natural that we
983 * have one size_class for each size, there is a chance that we
984 * can get more memory utilization if we use one size_class for
985 * many different sizes whose size_class have same
986 * characteristics. So, we makes size_class point to
987 * previous size_class if possible.
988 */
989 if (i < ZS_SIZE_CLASSES - 1) {
990 prev_class = pool->size_class[i + 1];
991 if (can_merge(prev_class, size, pages_per_zspage)) {
992 pool->size_class[i] = prev_class;
993 continue;
994 }
995 }
996
997 class = kzalloc(sizeof(struct size_class), GFP_KERNEL);
998 if (!class)
999 goto err;
955 1000
956 class = &pool->size_class[i];
957 class->size = size; 1001 class->size = size;
958 class->index = i; 1002 class->index = i;
1003 class->pages_per_zspage = pages_per_zspage;
959 spin_lock_init(&class->lock); 1004 spin_lock_init(&class->lock);
960 class->pages_per_zspage = get_pages_per_zspage(size); 1005 pool->size_class[i] = class;
961
962 } 1006 }
963 1007
964 pool->flags = flags; 1008 pool->flags = flags;
965 1009
966 return pool; 1010 return pool;
1011
1012err:
1013 zs_destroy_pool(pool);
1014 return NULL;
967} 1015}
968EXPORT_SYMBOL_GPL(zs_create_pool); 1016EXPORT_SYMBOL_GPL(zs_create_pool);
969 1017
@@ -973,7 +1021,13 @@ void zs_destroy_pool(struct zs_pool *pool)
973 1021
974 for (i = 0; i < ZS_SIZE_CLASSES; i++) { 1022 for (i = 0; i < ZS_SIZE_CLASSES; i++) {
975 int fg; 1023 int fg;
976 struct size_class *class = &pool->size_class[i]; 1024 struct size_class *class = pool->size_class[i];
1025
1026 if (!class)
1027 continue;
1028
1029 if (class->index != i)
1030 continue;
977 1031
978 for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) { 1032 for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
979 if (class->fullness_list[fg]) { 1033 if (class->fullness_list[fg]) {
@@ -981,6 +1035,7 @@ void zs_destroy_pool(struct zs_pool *pool)
981 class->size, fg); 1035 class->size, fg);
982 } 1036 }
983 } 1037 }
1038 kfree(class);
984 } 1039 }
985 kfree(pool); 1040 kfree(pool);
986} 1041}
@@ -999,7 +1054,6 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
999{ 1054{
1000 unsigned long obj; 1055 unsigned long obj;
1001 struct link_free *link; 1056 struct link_free *link;
1002 int class_idx;
1003 struct size_class *class; 1057 struct size_class *class;
1004 1058
1005 struct page *first_page, *m_page; 1059 struct page *first_page, *m_page;
@@ -1008,9 +1062,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
1008 if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE)) 1062 if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
1009 return 0; 1063 return 0;
1010 1064
1011 class_idx = get_size_class_index(size); 1065 class = pool->size_class[get_size_class_index(size)];
1012 class = &pool->size_class[class_idx];
1013 BUG_ON(class_idx != class->index);
1014 1066
1015 spin_lock(&class->lock); 1067 spin_lock(&class->lock);
1016 first_page = find_get_zspage(class); 1068 first_page = find_get_zspage(class);
@@ -1063,7 +1115,7 @@ void zs_free(struct zs_pool *pool, unsigned long obj)
1063 first_page = get_first_page(f_page); 1115 first_page = get_first_page(f_page);
1064 1116
1065 get_zspage_mapping(first_page, &class_idx, &fullness); 1117 get_zspage_mapping(first_page, &class_idx, &fullness);
1066 class = &pool->size_class[class_idx]; 1118 class = pool->size_class[class_idx];
1067 f_offset = obj_idx_to_offset(f_page, f_objidx, class->size); 1119 f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);
1068 1120
1069 spin_lock(&class->lock); 1121 spin_lock(&class->lock);
@@ -1124,7 +1176,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
1124 1176
1125 obj_handle_to_location(handle, &page, &obj_idx); 1177 obj_handle_to_location(handle, &page, &obj_idx);
1126 get_zspage_mapping(get_first_page(page), &class_idx, &fg); 1178 get_zspage_mapping(get_first_page(page), &class_idx, &fg);
1127 class = &pool->size_class[class_idx]; 1179 class = pool->size_class[class_idx];
1128 off = obj_idx_to_offset(page, obj_idx, class->size); 1180 off = obj_idx_to_offset(page, obj_idx, class->size);
1129 1181
1130 area = &get_cpu_var(zs_map_area); 1182 area = &get_cpu_var(zs_map_area);
@@ -1158,7 +1210,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
1158 1210
1159 obj_handle_to_location(handle, &page, &obj_idx); 1211 obj_handle_to_location(handle, &page, &obj_idx);
1160 get_zspage_mapping(get_first_page(page), &class_idx, &fg); 1212 get_zspage_mapping(get_first_page(page), &class_idx, &fg);
1161 class = &pool->size_class[class_idx]; 1213 class = pool->size_class[class_idx];
1162 off = obj_idx_to_offset(page, obj_idx, class->size); 1214 off = obj_idx_to_offset(page, obj_idx, class->size);
1163 1215
1164 area = this_cpu_ptr(&zs_map_area); 1216 area = this_cpu_ptr(&zs_map_area);