aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/uapi/linux/magic.h1
-rw-r--r--mm/zsmalloc.c769
2 files changed, 654 insertions, 116 deletions
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index d829ce63529d..e398beac67b8 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -81,5 +81,6 @@
81/* Since UDF 2.01 is ISO 13346 based... */ 81/* Since UDF 2.01 is ISO 13346 based... */
82#define UDF_SUPER_MAGIC 0x15013346 82#define UDF_SUPER_MAGIC 0x15013346
83#define BALLOON_KVM_MAGIC 0x13661366 83#define BALLOON_KVM_MAGIC 0x13661366
84#define ZSMALLOC_MAGIC 0x58295829
84 85
85#endif /* __LINUX_MAGIC_H__ */ 86#endif /* __LINUX_MAGIC_H__ */
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index c6fb543cfb98..04a4f063b4fd 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -17,14 +17,14 @@
17 * 17 *
18 * Usage of struct page fields: 18 * Usage of struct page fields:
19 * page->private: points to zspage 19 * page->private: points to zspage
20 * page->index: offset of the first object starting in this page. 20 * page->freelist(index): links together all component pages of a zspage
21 * For the first page, this is always 0, so we use this field 21 * For the huge page, this is always 0, so we use this field
22 * to store handle for huge object. 22 * to store handle.
23 * page->next: links together all component pages of a zspage
24 * 23 *
25 * Usage of struct page flags: 24 * Usage of struct page flags:
26 * PG_private: identifies the first component page 25 * PG_private: identifies the first component page
27 * PG_private2: identifies the last component page 26 * PG_private2: identifies the last component page
27 * PG_owner_priv_1: indentifies the huge component page
28 * 28 *
29 */ 29 */
30 30
@@ -49,6 +49,11 @@
49#include <linux/debugfs.h> 49#include <linux/debugfs.h>
50#include <linux/zsmalloc.h> 50#include <linux/zsmalloc.h>
51#include <linux/zpool.h> 51#include <linux/zpool.h>
52#include <linux/mount.h>
53#include <linux/compaction.h>
54#include <linux/pagemap.h>
55
56#define ZSPAGE_MAGIC 0x58
52 57
53/* 58/*
54 * This must be power of 2 and greater than of equal to sizeof(link_free). 59 * This must be power of 2 and greater than of equal to sizeof(link_free).
@@ -136,25 +141,23 @@
136 * We do not maintain any list for completely empty or full pages 141 * We do not maintain any list for completely empty or full pages
137 */ 142 */
138enum fullness_group { 143enum fullness_group {
139 ZS_ALMOST_FULL,
140 ZS_ALMOST_EMPTY,
141 ZS_EMPTY, 144 ZS_EMPTY,
142 ZS_FULL 145 ZS_ALMOST_EMPTY,
146 ZS_ALMOST_FULL,
147 ZS_FULL,
148 NR_ZS_FULLNESS,
143}; 149};
144 150
145enum zs_stat_type { 151enum zs_stat_type {
152 CLASS_EMPTY,
153 CLASS_ALMOST_EMPTY,
154 CLASS_ALMOST_FULL,
155 CLASS_FULL,
146 OBJ_ALLOCATED, 156 OBJ_ALLOCATED,
147 OBJ_USED, 157 OBJ_USED,
148 CLASS_ALMOST_FULL, 158 NR_ZS_STAT_TYPE,
149 CLASS_ALMOST_EMPTY,
150}; 159};
151 160
152#ifdef CONFIG_ZSMALLOC_STAT
153#define NR_ZS_STAT_TYPE (CLASS_ALMOST_EMPTY + 1)
154#else
155#define NR_ZS_STAT_TYPE (OBJ_USED + 1)
156#endif
157
158struct zs_size_stat { 161struct zs_size_stat {
159 unsigned long objs[NR_ZS_STAT_TYPE]; 162 unsigned long objs[NR_ZS_STAT_TYPE];
160}; 163};
@@ -163,6 +166,10 @@ struct zs_size_stat {
163static struct dentry *zs_stat_root; 166static struct dentry *zs_stat_root;
164#endif 167#endif
165 168
169#ifdef CONFIG_COMPACTION
170static struct vfsmount *zsmalloc_mnt;
171#endif
172
166/* 173/*
167 * number of size_classes 174 * number of size_classes
168 */ 175 */
@@ -186,23 +193,36 @@ static const int fullness_threshold_frac = 4;
186 193
187struct size_class { 194struct size_class {
188 spinlock_t lock; 195 spinlock_t lock;
189 struct list_head fullness_list[2]; 196 struct list_head fullness_list[NR_ZS_FULLNESS];
190 /* 197 /*
191 * Size of objects stored in this class. Must be multiple 198 * Size of objects stored in this class. Must be multiple
192 * of ZS_ALIGN. 199 * of ZS_ALIGN.
193 */ 200 */
194 int size; 201 int size;
195 int objs_per_zspage; 202 int objs_per_zspage;
196 unsigned int index;
197
198 struct zs_size_stat stats;
199
200 /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ 203 /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
201 int pages_per_zspage; 204 int pages_per_zspage;
202 /* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */ 205
203 bool huge; 206 unsigned int index;
207 struct zs_size_stat stats;
204}; 208};
205 209
210/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
211static void SetPageHugeObject(struct page *page)
212{
213 SetPageOwnerPriv1(page);
214}
215
216static void ClearPageHugeObject(struct page *page)
217{
218 ClearPageOwnerPriv1(page);
219}
220
221static int PageHugeObject(struct page *page)
222{
223 return PageOwnerPriv1(page);
224}
225
206/* 226/*
207 * Placed within free objects to form a singly linked list. 227 * Placed within free objects to form a singly linked list.
208 * For every zspage, zspage->freeobj gives head of this list. 228 * For every zspage, zspage->freeobj gives head of this list.
@@ -244,6 +264,10 @@ struct zs_pool {
244#ifdef CONFIG_ZSMALLOC_STAT 264#ifdef CONFIG_ZSMALLOC_STAT
245 struct dentry *stat_dentry; 265 struct dentry *stat_dentry;
246#endif 266#endif
267#ifdef CONFIG_COMPACTION
268 struct inode *inode;
269 struct work_struct free_work;
270#endif
247}; 271};
248 272
249/* 273/*
@@ -252,16 +276,23 @@ struct zs_pool {
252 */ 276 */
253#define FULLNESS_BITS 2 277#define FULLNESS_BITS 2
254#define CLASS_BITS 8 278#define CLASS_BITS 8
279#define ISOLATED_BITS 3
280#define MAGIC_VAL_BITS 8
255 281
256struct zspage { 282struct zspage {
257 struct { 283 struct {
258 unsigned int fullness:FULLNESS_BITS; 284 unsigned int fullness:FULLNESS_BITS;
259 unsigned int class:CLASS_BITS; 285 unsigned int class:CLASS_BITS;
286 unsigned int isolated:ISOLATED_BITS;
287 unsigned int magic:MAGIC_VAL_BITS;
260 }; 288 };
261 unsigned int inuse; 289 unsigned int inuse;
262 unsigned int freeobj; 290 unsigned int freeobj;
263 struct page *first_page; 291 struct page *first_page;
264 struct list_head list; /* fullness list */ 292 struct list_head list; /* fullness list */
293#ifdef CONFIG_COMPACTION
294 rwlock_t lock;
295#endif
265}; 296};
266 297
267struct mapping_area { 298struct mapping_area {
@@ -274,6 +305,28 @@ struct mapping_area {
274 enum zs_mapmode vm_mm; /* mapping mode */ 305 enum zs_mapmode vm_mm; /* mapping mode */
275}; 306};
276 307
308#ifdef CONFIG_COMPACTION
309static int zs_register_migration(struct zs_pool *pool);
310static void zs_unregister_migration(struct zs_pool *pool);
311static void migrate_lock_init(struct zspage *zspage);
312static void migrate_read_lock(struct zspage *zspage);
313static void migrate_read_unlock(struct zspage *zspage);
314static void kick_deferred_free(struct zs_pool *pool);
315static void init_deferred_free(struct zs_pool *pool);
316static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
317#else
318static int zsmalloc_mount(void) { return 0; }
319static void zsmalloc_unmount(void) {}
320static int zs_register_migration(struct zs_pool *pool) { return 0; }
321static void zs_unregister_migration(struct zs_pool *pool) {}
322static void migrate_lock_init(struct zspage *zspage) {}
323static void migrate_read_lock(struct zspage *zspage) {}
324static void migrate_read_unlock(struct zspage *zspage) {}
325static void kick_deferred_free(struct zs_pool *pool) {}
326static void init_deferred_free(struct zs_pool *pool) {}
327static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {}
328#endif
329
277static int create_cache(struct zs_pool *pool) 330static int create_cache(struct zs_pool *pool)
278{ 331{
279 pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE, 332 pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
@@ -301,7 +354,7 @@ static void destroy_cache(struct zs_pool *pool)
301static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) 354static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp)
302{ 355{
303 return (unsigned long)kmem_cache_alloc(pool->handle_cachep, 356 return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
304 gfp & ~__GFP_HIGHMEM); 357 gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
305} 358}
306 359
307static void cache_free_handle(struct zs_pool *pool, unsigned long handle) 360static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
@@ -311,7 +364,8 @@ static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
311 364
312static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags) 365static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
313{ 366{
314 return kmem_cache_alloc(pool->zspage_cachep, flags & ~__GFP_HIGHMEM); 367 return kmem_cache_alloc(pool->zspage_cachep,
368 flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
315}; 369};
316 370
317static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) 371static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
@@ -421,11 +475,17 @@ static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
421/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ 475/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
422static DEFINE_PER_CPU(struct mapping_area, zs_map_area); 476static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
423 477
478static bool is_zspage_isolated(struct zspage *zspage)
479{
480 return zspage->isolated;
481}
482
424static int is_first_page(struct page *page) 483static int is_first_page(struct page *page)
425{ 484{
426 return PagePrivate(page); 485 return PagePrivate(page);
427} 486}
428 487
488/* Protected by class->lock */
429static inline int get_zspage_inuse(struct zspage *zspage) 489static inline int get_zspage_inuse(struct zspage *zspage)
430{ 490{
431 return zspage->inuse; 491 return zspage->inuse;
@@ -441,20 +501,22 @@ static inline void mod_zspage_inuse(struct zspage *zspage, int val)
441 zspage->inuse += val; 501 zspage->inuse += val;
442} 502}
443 503
444static inline int get_first_obj_offset(struct page *page) 504static inline struct page *get_first_page(struct zspage *zspage)
445{ 505{
446 if (is_first_page(page)) 506 struct page *first_page = zspage->first_page;
447 return 0;
448 507
449 return page->index; 508 VM_BUG_ON_PAGE(!is_first_page(first_page), first_page);
509 return first_page;
450} 510}
451 511
452static inline void set_first_obj_offset(struct page *page, int offset) 512static inline int get_first_obj_offset(struct page *page)
453{ 513{
454 if (is_first_page(page)) 514 return page->units;
455 return; 515}
456 516
457 page->index = offset; 517static inline void set_first_obj_offset(struct page *page, int offset)
518{
519 page->units = offset;
458} 520}
459 521
460static inline unsigned int get_freeobj(struct zspage *zspage) 522static inline unsigned int get_freeobj(struct zspage *zspage)
@@ -471,6 +533,8 @@ static void get_zspage_mapping(struct zspage *zspage,
471 unsigned int *class_idx, 533 unsigned int *class_idx,
472 enum fullness_group *fullness) 534 enum fullness_group *fullness)
473{ 535{
536 BUG_ON(zspage->magic != ZSPAGE_MAGIC);
537
474 *fullness = zspage->fullness; 538 *fullness = zspage->fullness;
475 *class_idx = zspage->class; 539 *class_idx = zspage->class;
476} 540}
@@ -504,23 +568,19 @@ static int get_size_class_index(int size)
504static inline void zs_stat_inc(struct size_class *class, 568static inline void zs_stat_inc(struct size_class *class,
505 enum zs_stat_type type, unsigned long cnt) 569 enum zs_stat_type type, unsigned long cnt)
506{ 570{
507 if (type < NR_ZS_STAT_TYPE) 571 class->stats.objs[type] += cnt;
508 class->stats.objs[type] += cnt;
509} 572}
510 573
511static inline void zs_stat_dec(struct size_class *class, 574static inline void zs_stat_dec(struct size_class *class,
512 enum zs_stat_type type, unsigned long cnt) 575 enum zs_stat_type type, unsigned long cnt)
513{ 576{
514 if (type < NR_ZS_STAT_TYPE) 577 class->stats.objs[type] -= cnt;
515 class->stats.objs[type] -= cnt;
516} 578}
517 579
518static inline unsigned long zs_stat_get(struct size_class *class, 580static inline unsigned long zs_stat_get(struct size_class *class,
519 enum zs_stat_type type) 581 enum zs_stat_type type)
520{ 582{
521 if (type < NR_ZS_STAT_TYPE) 583 return class->stats.objs[type];
522 return class->stats.objs[type];
523 return 0;
524} 584}
525 585
526#ifdef CONFIG_ZSMALLOC_STAT 586#ifdef CONFIG_ZSMALLOC_STAT
@@ -664,6 +724,7 @@ static inline void zs_pool_stat_destroy(struct zs_pool *pool)
664} 724}
665#endif 725#endif
666 726
727
667/* 728/*
668 * For each size class, zspages are divided into different groups 729 * For each size class, zspages are divided into different groups
669 * depending on how "full" they are. This was done so that we could 730 * depending on how "full" they are. This was done so that we could
@@ -704,15 +765,9 @@ static void insert_zspage(struct size_class *class,
704{ 765{
705 struct zspage *head; 766 struct zspage *head;
706 767
707 if (fullness >= ZS_EMPTY) 768 zs_stat_inc(class, fullness, 1);
708 return;
709
710 head = list_first_entry_or_null(&class->fullness_list[fullness], 769 head = list_first_entry_or_null(&class->fullness_list[fullness],
711 struct zspage, list); 770 struct zspage, list);
712
713 zs_stat_inc(class, fullness == ZS_ALMOST_EMPTY ?
714 CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);
715
716 /* 771 /*
717 * We want to see more ZS_FULL pages and less almost empty/full. 772 * We want to see more ZS_FULL pages and less almost empty/full.
718 * Put pages with higher ->inuse first. 773 * Put pages with higher ->inuse first.
@@ -734,14 +789,11 @@ static void remove_zspage(struct size_class *class,
734 struct zspage *zspage, 789 struct zspage *zspage,
735 enum fullness_group fullness) 790 enum fullness_group fullness)
736{ 791{
737 if (fullness >= ZS_EMPTY)
738 return;
739
740 VM_BUG_ON(list_empty(&class->fullness_list[fullness])); 792 VM_BUG_ON(list_empty(&class->fullness_list[fullness]));
793 VM_BUG_ON(is_zspage_isolated(zspage));
741 794
742 list_del_init(&zspage->list); 795 list_del_init(&zspage->list);
743 zs_stat_dec(class, fullness == ZS_ALMOST_EMPTY ? 796 zs_stat_dec(class, fullness, 1);
744 CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);
745} 797}
746 798
747/* 799/*
@@ -764,8 +816,11 @@ static enum fullness_group fix_fullness_group(struct size_class *class,
764 if (newfg == currfg) 816 if (newfg == currfg)
765 goto out; 817 goto out;
766 818
767 remove_zspage(class, zspage, currfg); 819 if (!is_zspage_isolated(zspage)) {
768 insert_zspage(class, zspage, newfg); 820 remove_zspage(class, zspage, currfg);
821 insert_zspage(class, zspage, newfg);
822 }
823
769 set_zspage_mapping(zspage, class_idx, newfg); 824 set_zspage_mapping(zspage, class_idx, newfg);
770 825
771out: 826out:
@@ -808,19 +863,20 @@ static int get_pages_per_zspage(int class_size)
808 return max_usedpc_order; 863 return max_usedpc_order;
809} 864}
810 865
811static struct page *get_first_page(struct zspage *zspage)
812{
813 return zspage->first_page;
814}
815
816static struct zspage *get_zspage(struct page *page) 866static struct zspage *get_zspage(struct page *page)
817{ 867{
818 return (struct zspage *)page->private; 868 struct zspage *zspage = (struct zspage *)page->private;
869
870 BUG_ON(zspage->magic != ZSPAGE_MAGIC);
871 return zspage;
819} 872}
820 873
821static struct page *get_next_page(struct page *page) 874static struct page *get_next_page(struct page *page)
822{ 875{
823 return page->next; 876 if (unlikely(PageHugeObject(page)))
877 return NULL;
878
879 return page->freelist;
824} 880}
825 881
826/** 882/**
@@ -857,16 +913,20 @@ static unsigned long handle_to_obj(unsigned long handle)
857 return *(unsigned long *)handle; 913 return *(unsigned long *)handle;
858} 914}
859 915
860static unsigned long obj_to_head(struct size_class *class, struct page *page, 916static unsigned long obj_to_head(struct page *page, void *obj)
861 void *obj)
862{ 917{
863 if (class->huge) { 918 if (unlikely(PageHugeObject(page))) {
864 VM_BUG_ON_PAGE(!is_first_page(page), page); 919 VM_BUG_ON_PAGE(!is_first_page(page), page);
865 return page->index; 920 return page->index;
866 } else 921 } else
867 return *(unsigned long *)obj; 922 return *(unsigned long *)obj;
868} 923}
869 924
925static inline int testpin_tag(unsigned long handle)
926{
927 return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle);
928}
929
870static inline int trypin_tag(unsigned long handle) 930static inline int trypin_tag(unsigned long handle)
871{ 931{
872 return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); 932 return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle);
@@ -884,27 +944,94 @@ static void unpin_tag(unsigned long handle)
884 944
885static void reset_page(struct page *page) 945static void reset_page(struct page *page)
886{ 946{
947 __ClearPageMovable(page);
887 clear_bit(PG_private, &page->flags); 948 clear_bit(PG_private, &page->flags);
888 clear_bit(PG_private_2, &page->flags); 949 clear_bit(PG_private_2, &page->flags);
889 set_page_private(page, 0); 950 set_page_private(page, 0);
890 page->index = 0; 951 page_mapcount_reset(page);
952 ClearPageHugeObject(page);
953 page->freelist = NULL;
954}
955
956/*
957 * To prevent zspage destroy during migration, zspage freeing should
958 * hold locks of all pages in the zspage.
959 */
960void lock_zspage(struct zspage *zspage)
961{
962 struct page *page = get_first_page(zspage);
963
964 do {
965 lock_page(page);
966 } while ((page = get_next_page(page)) != NULL);
967}
968
969int trylock_zspage(struct zspage *zspage)
970{
971 struct page *cursor, *fail;
972
973 for (cursor = get_first_page(zspage); cursor != NULL; cursor =
974 get_next_page(cursor)) {
975 if (!trylock_page(cursor)) {
976 fail = cursor;
977 goto unlock;
978 }
979 }
980
981 return 1;
982unlock:
983 for (cursor = get_first_page(zspage); cursor != fail; cursor =
984 get_next_page(cursor))
985 unlock_page(cursor);
986
987 return 0;
891} 988}
892 989
893static void free_zspage(struct zs_pool *pool, struct zspage *zspage) 990static void __free_zspage(struct zs_pool *pool, struct size_class *class,
991 struct zspage *zspage)
894{ 992{
895 struct page *page, *next; 993 struct page *page, *next;
994 enum fullness_group fg;
995 unsigned int class_idx;
996
997 get_zspage_mapping(zspage, &class_idx, &fg);
998
999 assert_spin_locked(&class->lock);
896 1000
897 VM_BUG_ON(get_zspage_inuse(zspage)); 1001 VM_BUG_ON(get_zspage_inuse(zspage));
1002 VM_BUG_ON(fg != ZS_EMPTY);
898 1003
899 next = page = zspage->first_page; 1004 next = page = get_first_page(zspage);
900 do { 1005 do {
901 next = page->next; 1006 VM_BUG_ON_PAGE(!PageLocked(page), page);
1007 next = get_next_page(page);
902 reset_page(page); 1008 reset_page(page);
1009 unlock_page(page);
903 put_page(page); 1010 put_page(page);
904 page = next; 1011 page = next;
905 } while (page != NULL); 1012 } while (page != NULL);
906 1013
907 cache_free_zspage(pool, zspage); 1014 cache_free_zspage(pool, zspage);
1015
1016 zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
1017 class->size, class->pages_per_zspage));
1018 atomic_long_sub(class->pages_per_zspage,
1019 &pool->pages_allocated);
1020}
1021
1022static void free_zspage(struct zs_pool *pool, struct size_class *class,
1023 struct zspage *zspage)
1024{
1025 VM_BUG_ON(get_zspage_inuse(zspage));
1026 VM_BUG_ON(list_empty(&zspage->list));
1027
1028 if (!trylock_zspage(zspage)) {
1029 kick_deferred_free(pool);
1030 return;
1031 }
1032
1033 remove_zspage(class, zspage, ZS_EMPTY);
1034 __free_zspage(pool, class, zspage);
908} 1035}
909 1036
910/* Initialize a newly allocated zspage */ 1037/* Initialize a newly allocated zspage */
@@ -912,7 +1039,7 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
912{ 1039{
913 unsigned int freeobj = 1; 1040 unsigned int freeobj = 1;
914 unsigned long off = 0; 1041 unsigned long off = 0;
915 struct page *page = zspage->first_page; 1042 struct page *page = get_first_page(zspage);
916 1043
917 while (page) { 1044 while (page) {
918 struct page *next_page; 1045 struct page *next_page;
@@ -952,16 +1079,17 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
952 set_freeobj(zspage, 0); 1079 set_freeobj(zspage, 0);
953} 1080}
954 1081
955static void create_page_chain(struct zspage *zspage, struct page *pages[], 1082static void create_page_chain(struct size_class *class, struct zspage *zspage,
956 int nr_pages) 1083 struct page *pages[])
957{ 1084{
958 int i; 1085 int i;
959 struct page *page; 1086 struct page *page;
960 struct page *prev_page = NULL; 1087 struct page *prev_page = NULL;
1088 int nr_pages = class->pages_per_zspage;
961 1089
962 /* 1090 /*
963 * Allocate individual pages and link them together as: 1091 * Allocate individual pages and link them together as:
964 * 1. all pages are linked together using page->next 1092 * 1. all pages are linked together using page->freelist
965 * 2. each sub-page point to zspage using page->private 1093 * 2. each sub-page point to zspage using page->private
966 * 1094 *
967 * we set PG_private to identify the first page (i.e. no other sub-page 1095 * we set PG_private to identify the first page (i.e. no other sub-page
@@ -970,16 +1098,18 @@ static void create_page_chain(struct zspage *zspage, struct page *pages[],
970 for (i = 0; i < nr_pages; i++) { 1098 for (i = 0; i < nr_pages; i++) {
971 page = pages[i]; 1099 page = pages[i];
972 set_page_private(page, (unsigned long)zspage); 1100 set_page_private(page, (unsigned long)zspage);
1101 page->freelist = NULL;
973 if (i == 0) { 1102 if (i == 0) {
974 zspage->first_page = page; 1103 zspage->first_page = page;
975 SetPagePrivate(page); 1104 SetPagePrivate(page);
1105 if (unlikely(class->objs_per_zspage == 1 &&
1106 class->pages_per_zspage == 1))
1107 SetPageHugeObject(page);
976 } else { 1108 } else {
977 prev_page->next = page; 1109 prev_page->freelist = page;
978 } 1110 }
979 if (i == nr_pages - 1) { 1111 if (i == nr_pages - 1)
980 SetPagePrivate2(page); 1112 SetPagePrivate2(page);
981 page->next = NULL;
982 }
983 prev_page = page; 1113 prev_page = page;
984 } 1114 }
985} 1115}
@@ -999,6 +1129,8 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
999 return NULL; 1129 return NULL;
1000 1130
1001 memset(zspage, 0, sizeof(struct zspage)); 1131 memset(zspage, 0, sizeof(struct zspage));
1132 zspage->magic = ZSPAGE_MAGIC;
1133 migrate_lock_init(zspage);
1002 1134
1003 for (i = 0; i < class->pages_per_zspage; i++) { 1135 for (i = 0; i < class->pages_per_zspage; i++) {
1004 struct page *page; 1136 struct page *page;
@@ -1013,7 +1145,7 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
1013 pages[i] = page; 1145 pages[i] = page;
1014 } 1146 }
1015 1147
1016 create_page_chain(zspage, pages, class->pages_per_zspage); 1148 create_page_chain(class, zspage, pages);
1017 init_zspage(class, zspage); 1149 init_zspage(class, zspage);
1018 1150
1019 return zspage; 1151 return zspage;
@@ -1024,7 +1156,7 @@ static struct zspage *find_get_zspage(struct size_class *class)
1024 int i; 1156 int i;
1025 struct zspage *zspage; 1157 struct zspage *zspage;
1026 1158
1027 for (i = ZS_ALMOST_FULL; i <= ZS_ALMOST_EMPTY; i++) { 1159 for (i = ZS_ALMOST_FULL; i >= ZS_EMPTY; i--) {
1028 zspage = list_first_entry_or_null(&class->fullness_list[i], 1160 zspage = list_first_entry_or_null(&class->fullness_list[i],
1029 struct zspage, list); 1161 struct zspage, list);
1030 if (zspage) 1162 if (zspage)
@@ -1289,6 +1421,10 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
1289 obj = handle_to_obj(handle); 1421 obj = handle_to_obj(handle);
1290 obj_to_location(obj, &page, &obj_idx); 1422 obj_to_location(obj, &page, &obj_idx);
1291 zspage = get_zspage(page); 1423 zspage = get_zspage(page);
1424
1425 /* migration cannot move any subpage in this zspage */
1426 migrate_read_lock(zspage);
1427
1292 get_zspage_mapping(zspage, &class_idx, &fg); 1428 get_zspage_mapping(zspage, &class_idx, &fg);
1293 class = pool->size_class[class_idx]; 1429 class = pool->size_class[class_idx];
1294 off = (class->size * obj_idx) & ~PAGE_MASK; 1430 off = (class->size * obj_idx) & ~PAGE_MASK;
@@ -1309,7 +1445,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
1309 1445
1310 ret = __zs_map_object(area, pages, off, class->size); 1446 ret = __zs_map_object(area, pages, off, class->size);
1311out: 1447out:
1312 if (!class->huge) 1448 if (likely(!PageHugeObject(page)))
1313 ret += ZS_HANDLE_SIZE; 1449 ret += ZS_HANDLE_SIZE;
1314 1450
1315 return ret; 1451 return ret;
@@ -1348,6 +1484,8 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
1348 __zs_unmap_object(area, pages, off, class->size); 1484 __zs_unmap_object(area, pages, off, class->size);
1349 } 1485 }
1350 put_cpu_var(zs_map_area); 1486 put_cpu_var(zs_map_area);
1487
1488 migrate_read_unlock(zspage);
1351 unpin_tag(handle); 1489 unpin_tag(handle);
1352} 1490}
1353EXPORT_SYMBOL_GPL(zs_unmap_object); 1491EXPORT_SYMBOL_GPL(zs_unmap_object);
@@ -1377,7 +1515,7 @@ static unsigned long obj_malloc(struct size_class *class,
1377 vaddr = kmap_atomic(m_page); 1515 vaddr = kmap_atomic(m_page);
1378 link = (struct link_free *)vaddr + m_offset / sizeof(*link); 1516 link = (struct link_free *)vaddr + m_offset / sizeof(*link);
1379 set_freeobj(zspage, link->next >> OBJ_ALLOCATED_TAG); 1517 set_freeobj(zspage, link->next >> OBJ_ALLOCATED_TAG);
1380 if (!class->huge) 1518 if (likely(!PageHugeObject(m_page)))
1381 /* record handle in the header of allocated chunk */ 1519 /* record handle in the header of allocated chunk */
1382 link->handle = handle; 1520 link->handle = handle;
1383 else 1521 else
@@ -1407,6 +1545,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
1407{ 1545{
1408 unsigned long handle, obj; 1546 unsigned long handle, obj;
1409 struct size_class *class; 1547 struct size_class *class;
1548 enum fullness_group newfg;
1410 struct zspage *zspage; 1549 struct zspage *zspage;
1411 1550
1412 if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE)) 1551 if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
@@ -1422,28 +1561,37 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
1422 1561
1423 spin_lock(&class->lock); 1562 spin_lock(&class->lock);
1424 zspage = find_get_zspage(class); 1563 zspage = find_get_zspage(class);
1425 1564 if (likely(zspage)) {
1426 if (!zspage) { 1565 obj = obj_malloc(class, zspage, handle);
1566 /* Now move the zspage to another fullness group, if required */
1567 fix_fullness_group(class, zspage);
1568 record_obj(handle, obj);
1427 spin_unlock(&class->lock); 1569 spin_unlock(&class->lock);
1428 zspage = alloc_zspage(pool, class, gfp);
1429 if (unlikely(!zspage)) {
1430 cache_free_handle(pool, handle);
1431 return 0;
1432 }
1433 1570
1434 set_zspage_mapping(zspage, class->index, ZS_EMPTY); 1571 return handle;
1435 atomic_long_add(class->pages_per_zspage, 1572 }
1436 &pool->pages_allocated);
1437 1573
1438 spin_lock(&class->lock); 1574 spin_unlock(&class->lock);
1439 zs_stat_inc(class, OBJ_ALLOCATED, get_maxobj_per_zspage( 1575
1440 class->size, class->pages_per_zspage)); 1576 zspage = alloc_zspage(pool, class, gfp);
1577 if (!zspage) {
1578 cache_free_handle(pool, handle);
1579 return 0;
1441 } 1580 }
1442 1581
1582 spin_lock(&class->lock);
1443 obj = obj_malloc(class, zspage, handle); 1583 obj = obj_malloc(class, zspage, handle);
1444 /* Now move the zspage to another fullness group, if required */ 1584 newfg = get_fullness_group(class, zspage);
1445 fix_fullness_group(class, zspage); 1585 insert_zspage(class, zspage, newfg);
1586 set_zspage_mapping(zspage, class->index, newfg);
1446 record_obj(handle, obj); 1587 record_obj(handle, obj);
1588 atomic_long_add(class->pages_per_zspage,
1589 &pool->pages_allocated);
1590 zs_stat_inc(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
1591 class->size, class->pages_per_zspage));
1592
1593 /* We completely set up zspage so mark them as movable */
1594 SetZsPageMovable(pool, zspage);
1447 spin_unlock(&class->lock); 1595 spin_unlock(&class->lock);
1448 1596
1449 return handle; 1597 return handle;
@@ -1484,6 +1632,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
1484 int class_idx; 1632 int class_idx;
1485 struct size_class *class; 1633 struct size_class *class;
1486 enum fullness_group fullness; 1634 enum fullness_group fullness;
1635 bool isolated;
1487 1636
1488 if (unlikely(!handle)) 1637 if (unlikely(!handle))
1489 return; 1638 return;
@@ -1493,22 +1642,28 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
1493 obj_to_location(obj, &f_page, &f_objidx); 1642 obj_to_location(obj, &f_page, &f_objidx);
1494 zspage = get_zspage(f_page); 1643 zspage = get_zspage(f_page);
1495 1644
1645 migrate_read_lock(zspage);
1646
1496 get_zspage_mapping(zspage, &class_idx, &fullness); 1647 get_zspage_mapping(zspage, &class_idx, &fullness);
1497 class = pool->size_class[class_idx]; 1648 class = pool->size_class[class_idx];
1498 1649
1499 spin_lock(&class->lock); 1650 spin_lock(&class->lock);
1500 obj_free(class, obj); 1651 obj_free(class, obj);
1501 fullness = fix_fullness_group(class, zspage); 1652 fullness = fix_fullness_group(class, zspage);
1502 if (fullness == ZS_EMPTY) { 1653 if (fullness != ZS_EMPTY) {
1503 zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage( 1654 migrate_read_unlock(zspage);
1504 class->size, class->pages_per_zspage)); 1655 goto out;
1505 atomic_long_sub(class->pages_per_zspage,
1506 &pool->pages_allocated);
1507 free_zspage(pool, zspage);
1508 } 1656 }
1657
1658 isolated = is_zspage_isolated(zspage);
1659 migrate_read_unlock(zspage);
1660 /* If zspage is isolated, zs_page_putback will free the zspage */
1661 if (likely(!isolated))
1662 free_zspage(pool, class, zspage);
1663out:
1664
1509 spin_unlock(&class->lock); 1665 spin_unlock(&class->lock);
1510 unpin_tag(handle); 1666 unpin_tag(handle);
1511
1512 cache_free_handle(pool, handle); 1667 cache_free_handle(pool, handle);
1513} 1668}
1514EXPORT_SYMBOL_GPL(zs_free); 1669EXPORT_SYMBOL_GPL(zs_free);
@@ -1592,7 +1747,7 @@ static unsigned long find_alloced_obj(struct size_class *class,
1592 offset += class->size * index; 1747 offset += class->size * index;
1593 1748
1594 while (offset < PAGE_SIZE) { 1749 while (offset < PAGE_SIZE) {
1595 head = obj_to_head(class, page, addr + offset); 1750 head = obj_to_head(page, addr + offset);
1596 if (head & OBJ_ALLOCATED_TAG) { 1751 if (head & OBJ_ALLOCATED_TAG) {
1597 handle = head & ~OBJ_ALLOCATED_TAG; 1752 handle = head & ~OBJ_ALLOCATED_TAG;
1598 if (trypin_tag(handle)) 1753 if (trypin_tag(handle))
@@ -1684,6 +1839,7 @@ static struct zspage *isolate_zspage(struct size_class *class, bool source)
1684 zspage = list_first_entry_or_null(&class->fullness_list[fg[i]], 1839 zspage = list_first_entry_or_null(&class->fullness_list[fg[i]],
1685 struct zspage, list); 1840 struct zspage, list);
1686 if (zspage) { 1841 if (zspage) {
1842 VM_BUG_ON(is_zspage_isolated(zspage));
1687 remove_zspage(class, zspage, fg[i]); 1843 remove_zspage(class, zspage, fg[i]);
1688 return zspage; 1844 return zspage;
1689 } 1845 }
@@ -1704,6 +1860,8 @@ static enum fullness_group putback_zspage(struct size_class *class,
1704{ 1860{
1705 enum fullness_group fullness; 1861 enum fullness_group fullness;
1706 1862
1863 VM_BUG_ON(is_zspage_isolated(zspage));
1864
1707 fullness = get_fullness_group(class, zspage); 1865 fullness = get_fullness_group(class, zspage);
1708 insert_zspage(class, zspage, fullness); 1866 insert_zspage(class, zspage, fullness);
1709 set_zspage_mapping(zspage, class->index, fullness); 1867 set_zspage_mapping(zspage, class->index, fullness);
@@ -1711,6 +1869,378 @@ static enum fullness_group putback_zspage(struct size_class *class,
1711 return fullness; 1869 return fullness;
1712} 1870}
1713 1871
1872#ifdef CONFIG_COMPACTION
1873static struct dentry *zs_mount(struct file_system_type *fs_type,
1874 int flags, const char *dev_name, void *data)
1875{
1876 static const struct dentry_operations ops = {
1877 .d_dname = simple_dname,
1878 };
1879
1880 return mount_pseudo(fs_type, "zsmalloc:", NULL, &ops, ZSMALLOC_MAGIC);
1881}
1882
1883static struct file_system_type zsmalloc_fs = {
1884 .name = "zsmalloc",
1885 .mount = zs_mount,
1886 .kill_sb = kill_anon_super,
1887};
1888
1889static int zsmalloc_mount(void)
1890{
1891 int ret = 0;
1892
1893 zsmalloc_mnt = kern_mount(&zsmalloc_fs);
1894 if (IS_ERR(zsmalloc_mnt))
1895 ret = PTR_ERR(zsmalloc_mnt);
1896
1897 return ret;
1898}
1899
1900static void zsmalloc_unmount(void)
1901{
1902 kern_unmount(zsmalloc_mnt);
1903}
1904
1905static void migrate_lock_init(struct zspage *zspage)
1906{
1907 rwlock_init(&zspage->lock);
1908}
1909
1910static void migrate_read_lock(struct zspage *zspage)
1911{
1912 read_lock(&zspage->lock);
1913}
1914
1915static void migrate_read_unlock(struct zspage *zspage)
1916{
1917 read_unlock(&zspage->lock);
1918}
1919
1920static void migrate_write_lock(struct zspage *zspage)
1921{
1922 write_lock(&zspage->lock);
1923}
1924
1925static void migrate_write_unlock(struct zspage *zspage)
1926{
1927 write_unlock(&zspage->lock);
1928}
1929
1930/* Number of isolated subpage for *page migration* in this zspage */
1931static void inc_zspage_isolation(struct zspage *zspage)
1932{
1933 zspage->isolated++;
1934}
1935
1936static void dec_zspage_isolation(struct zspage *zspage)
1937{
1938 zspage->isolated--;
1939}
1940
1941static void replace_sub_page(struct size_class *class, struct zspage *zspage,
1942 struct page *newpage, struct page *oldpage)
1943{
1944 struct page *page;
1945 struct page *pages[ZS_MAX_PAGES_PER_ZSPAGE] = {NULL, };
1946 int idx = 0;
1947
1948 page = get_first_page(zspage);
1949 do {
1950 if (page == oldpage)
1951 pages[idx] = newpage;
1952 else
1953 pages[idx] = page;
1954 idx++;
1955 } while ((page = get_next_page(page)) != NULL);
1956
1957 create_page_chain(class, zspage, pages);
1958 set_first_obj_offset(newpage, get_first_obj_offset(oldpage));
1959 if (unlikely(PageHugeObject(oldpage)))
1960 newpage->index = oldpage->index;
1961 __SetPageMovable(newpage, page_mapping(oldpage));
1962}
1963
1964bool zs_page_isolate(struct page *page, isolate_mode_t mode)
1965{
1966 struct zs_pool *pool;
1967 struct size_class *class;
1968 int class_idx;
1969 enum fullness_group fullness;
1970 struct zspage *zspage;
1971 struct address_space *mapping;
1972
1973 /*
1974 * Page is locked so zspage couldn't be destroyed. For detail, look at
1975 * lock_zspage in free_zspage.
1976 */
1977 VM_BUG_ON_PAGE(!PageMovable(page), page);
1978 VM_BUG_ON_PAGE(PageIsolated(page), page);
1979
1980 zspage = get_zspage(page);
1981
1982 /*
1983 * Without class lock, fullness could be stale while class_idx is okay
1984 * because class_idx is constant unless page is freed so we should get
1985 * fullness again under class lock.
1986 */
1987 get_zspage_mapping(zspage, &class_idx, &fullness);
1988 mapping = page_mapping(page);
1989 pool = mapping->private_data;
1990 class = pool->size_class[class_idx];
1991
1992 spin_lock(&class->lock);
1993 if (get_zspage_inuse(zspage) == 0) {
1994 spin_unlock(&class->lock);
1995 return false;
1996 }
1997
1998 /* zspage is isolated for object migration */
1999 if (list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
2000 spin_unlock(&class->lock);
2001 return false;
2002 }
2003
2004 /*
2005 * If this is first time isolation for the zspage, isolate zspage from
2006 * size_class to prevent further object allocation from the zspage.
2007 */
2008 if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
2009 get_zspage_mapping(zspage, &class_idx, &fullness);
2010 remove_zspage(class, zspage, fullness);
2011 }
2012
2013 inc_zspage_isolation(zspage);
2014 spin_unlock(&class->lock);
2015
2016 return true;
2017}
2018
2019int zs_page_migrate(struct address_space *mapping, struct page *newpage,
2020 struct page *page, enum migrate_mode mode)
2021{
2022 struct zs_pool *pool;
2023 struct size_class *class;
2024 int class_idx;
2025 enum fullness_group fullness;
2026 struct zspage *zspage;
2027 struct page *dummy;
2028 void *s_addr, *d_addr, *addr;
2029 int offset, pos;
2030 unsigned long handle, head;
2031 unsigned long old_obj, new_obj;
2032 unsigned int obj_idx;
2033 int ret = -EAGAIN;
2034
2035 VM_BUG_ON_PAGE(!PageMovable(page), page);
2036 VM_BUG_ON_PAGE(!PageIsolated(page), page);
2037
2038 zspage = get_zspage(page);
2039
2040 /* Concurrent compactor cannot migrate any subpage in zspage */
2041 migrate_write_lock(zspage);
2042 get_zspage_mapping(zspage, &class_idx, &fullness);
2043 pool = mapping->private_data;
2044 class = pool->size_class[class_idx];
2045 offset = get_first_obj_offset(page);
2046
2047 spin_lock(&class->lock);
2048 if (!get_zspage_inuse(zspage)) {
2049 ret = -EBUSY;
2050 goto unlock_class;
2051 }
2052
2053 pos = offset;
2054 s_addr = kmap_atomic(page);
2055 while (pos < PAGE_SIZE) {
2056 head = obj_to_head(page, s_addr + pos);
2057 if (head & OBJ_ALLOCATED_TAG) {
2058 handle = head & ~OBJ_ALLOCATED_TAG;
2059 if (!trypin_tag(handle))
2060 goto unpin_objects;
2061 }
2062 pos += class->size;
2063 }
2064
2065 /*
2066 * Here, any user cannot access all objects in the zspage so let's move.
2067 */
2068 d_addr = kmap_atomic(newpage);
2069 memcpy(d_addr, s_addr, PAGE_SIZE);
2070 kunmap_atomic(d_addr);
2071
2072 for (addr = s_addr + offset; addr < s_addr + pos;
2073 addr += class->size) {
2074 head = obj_to_head(page, addr);
2075 if (head & OBJ_ALLOCATED_TAG) {
2076 handle = head & ~OBJ_ALLOCATED_TAG;
2077 if (!testpin_tag(handle))
2078 BUG();
2079
2080 old_obj = handle_to_obj(handle);
2081 obj_to_location(old_obj, &dummy, &obj_idx);
2082 new_obj = (unsigned long)location_to_obj(newpage,
2083 obj_idx);
2084 new_obj |= BIT(HANDLE_PIN_BIT);
2085 record_obj(handle, new_obj);
2086 }
2087 }
2088
2089 replace_sub_page(class, zspage, newpage, page);
2090 get_page(newpage);
2091
2092 dec_zspage_isolation(zspage);
2093
2094 /*
2095 * Page migration is done so let's putback isolated zspage to
2096 * the list if @page is final isolated subpage in the zspage.
2097 */
2098 if (!is_zspage_isolated(zspage))
2099 putback_zspage(class, zspage);
2100
2101 reset_page(page);
2102 put_page(page);
2103 page = newpage;
2104
2105 ret = 0;
2106unpin_objects:
2107 for (addr = s_addr + offset; addr < s_addr + pos;
2108 addr += class->size) {
2109 head = obj_to_head(page, addr);
2110 if (head & OBJ_ALLOCATED_TAG) {
2111 handle = head & ~OBJ_ALLOCATED_TAG;
2112 if (!testpin_tag(handle))
2113 BUG();
2114 unpin_tag(handle);
2115 }
2116 }
2117 kunmap_atomic(s_addr);
2118unlock_class:
2119 spin_unlock(&class->lock);
2120 migrate_write_unlock(zspage);
2121
2122 return ret;
2123}
2124
2125void zs_page_putback(struct page *page)
2126{
2127 struct zs_pool *pool;
2128 struct size_class *class;
2129 int class_idx;
2130 enum fullness_group fg;
2131 struct address_space *mapping;
2132 struct zspage *zspage;
2133
2134 VM_BUG_ON_PAGE(!PageMovable(page), page);
2135 VM_BUG_ON_PAGE(!PageIsolated(page), page);
2136
2137 zspage = get_zspage(page);
2138 get_zspage_mapping(zspage, &class_idx, &fg);
2139 mapping = page_mapping(page);
2140 pool = mapping->private_data;
2141 class = pool->size_class[class_idx];
2142
2143 spin_lock(&class->lock);
2144 dec_zspage_isolation(zspage);
2145 if (!is_zspage_isolated(zspage)) {
2146 fg = putback_zspage(class, zspage);
2147 /*
2148 * Due to page_lock, we cannot free zspage immediately
2149 * so let's defer.
2150 */
2151 if (fg == ZS_EMPTY)
2152 schedule_work(&pool->free_work);
2153 }
2154 spin_unlock(&class->lock);
2155}
2156
2157const struct address_space_operations zsmalloc_aops = {
2158 .isolate_page = zs_page_isolate,
2159 .migratepage = zs_page_migrate,
2160 .putback_page = zs_page_putback,
2161};
2162
2163static int zs_register_migration(struct zs_pool *pool)
2164{
2165 pool->inode = alloc_anon_inode(zsmalloc_mnt->mnt_sb);
2166 if (IS_ERR(pool->inode)) {
2167 pool->inode = NULL;
2168 return 1;
2169 }
2170
2171 pool->inode->i_mapping->private_data = pool;
2172 pool->inode->i_mapping->a_ops = &zsmalloc_aops;
2173 return 0;
2174}
2175
2176static void zs_unregister_migration(struct zs_pool *pool)
2177{
2178 flush_work(&pool->free_work);
2179 if (pool->inode)
2180 iput(pool->inode);
2181}
2182
2183/*
2184 * Caller should hold page_lock of all pages in the zspage
2185 * In here, we cannot use zspage meta data.
2186 */
2187static void async_free_zspage(struct work_struct *work)
2188{
2189 int i;
2190 struct size_class *class;
2191 unsigned int class_idx;
2192 enum fullness_group fullness;
2193 struct zspage *zspage, *tmp;
2194 LIST_HEAD(free_pages);
2195 struct zs_pool *pool = container_of(work, struct zs_pool,
2196 free_work);
2197
2198 for (i = 0; i < zs_size_classes; i++) {
2199 class = pool->size_class[i];
2200 if (class->index != i)
2201 continue;
2202
2203 spin_lock(&class->lock);
2204 list_splice_init(&class->fullness_list[ZS_EMPTY], &free_pages);
2205 spin_unlock(&class->lock);
2206 }
2207
2208
2209 list_for_each_entry_safe(zspage, tmp, &free_pages, list) {
2210 list_del(&zspage->list);
2211 lock_zspage(zspage);
2212
2213 get_zspage_mapping(zspage, &class_idx, &fullness);
2214 VM_BUG_ON(fullness != ZS_EMPTY);
2215 class = pool->size_class[class_idx];
2216 spin_lock(&class->lock);
2217 __free_zspage(pool, pool->size_class[class_idx], zspage);
2218 spin_unlock(&class->lock);
2219 }
2220};
2221
2222static void kick_deferred_free(struct zs_pool *pool)
2223{
2224 schedule_work(&pool->free_work);
2225}
2226
2227static void init_deferred_free(struct zs_pool *pool)
2228{
2229 INIT_WORK(&pool->free_work, async_free_zspage);
2230}
2231
2232static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage)
2233{
2234 struct page *page = get_first_page(zspage);
2235
2236 do {
2237 WARN_ON(!trylock_page(page));
2238 __SetPageMovable(page, pool->inode->i_mapping);
2239 unlock_page(page);
2240 } while ((page = get_next_page(page)) != NULL);
2241}
2242#endif
2243
1714/* 2244/*
1715 * 2245 *
1716 * Based on the number of unused allocated objects calculate 2246 * Based on the number of unused allocated objects calculate
@@ -1745,10 +2275,10 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
1745 break; 2275 break;
1746 2276
1747 cc.index = 0; 2277 cc.index = 0;
1748 cc.s_page = src_zspage->first_page; 2278 cc.s_page = get_first_page(src_zspage);
1749 2279
1750 while ((dst_zspage = isolate_zspage(class, false))) { 2280 while ((dst_zspage = isolate_zspage(class, false))) {
1751 cc.d_page = dst_zspage->first_page; 2281 cc.d_page = get_first_page(dst_zspage);
1752 /* 2282 /*
1753 * If there is no more space in dst_page, resched 2283 * If there is no more space in dst_page, resched
1754 * and see if anyone had allocated another zspage. 2284 * and see if anyone had allocated another zspage.
@@ -1765,11 +2295,7 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
1765 2295
1766 putback_zspage(class, dst_zspage); 2296 putback_zspage(class, dst_zspage);
1767 if (putback_zspage(class, src_zspage) == ZS_EMPTY) { 2297 if (putback_zspage(class, src_zspage) == ZS_EMPTY) {
1768 zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage( 2298 free_zspage(pool, class, src_zspage);
1769 class->size, class->pages_per_zspage));
1770 atomic_long_sub(class->pages_per_zspage,
1771 &pool->pages_allocated);
1772 free_zspage(pool, src_zspage);
1773 pool->stats.pages_compacted += class->pages_per_zspage; 2299 pool->stats.pages_compacted += class->pages_per_zspage;
1774 } 2300 }
1775 spin_unlock(&class->lock); 2301 spin_unlock(&class->lock);
@@ -1885,6 +2411,7 @@ struct zs_pool *zs_create_pool(const char *name)
1885 if (!pool) 2411 if (!pool)
1886 return NULL; 2412 return NULL;
1887 2413
2414 init_deferred_free(pool);
1888 pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *), 2415 pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
1889 GFP_KERNEL); 2416 GFP_KERNEL);
1890 if (!pool->size_class) { 2417 if (!pool->size_class) {
@@ -1939,12 +2466,10 @@ struct zs_pool *zs_create_pool(const char *name)
1939 class->pages_per_zspage = pages_per_zspage; 2466 class->pages_per_zspage = pages_per_zspage;
1940 class->objs_per_zspage = class->pages_per_zspage * 2467 class->objs_per_zspage = class->pages_per_zspage *
1941 PAGE_SIZE / class->size; 2468 PAGE_SIZE / class->size;
1942 if (pages_per_zspage == 1 && class->objs_per_zspage == 1)
1943 class->huge = true;
1944 spin_lock_init(&class->lock); 2469 spin_lock_init(&class->lock);
1945 pool->size_class[i] = class; 2470 pool->size_class[i] = class;
1946 for (fullness = ZS_ALMOST_FULL; fullness <= ZS_ALMOST_EMPTY; 2471 for (fullness = ZS_EMPTY; fullness < NR_ZS_FULLNESS;
1947 fullness++) 2472 fullness++)
1948 INIT_LIST_HEAD(&class->fullness_list[fullness]); 2473 INIT_LIST_HEAD(&class->fullness_list[fullness]);
1949 2474
1950 prev_class = class; 2475 prev_class = class;
@@ -1953,6 +2478,9 @@ struct zs_pool *zs_create_pool(const char *name)
1953 /* debug only, don't abort if it fails */ 2478 /* debug only, don't abort if it fails */
1954 zs_pool_stat_create(pool, name); 2479 zs_pool_stat_create(pool, name);
1955 2480
2481 if (zs_register_migration(pool))
2482 goto err;
2483
1956 /* 2484 /*
1957 * Not critical, we still can use the pool 2485 * Not critical, we still can use the pool
1958 * and user can trigger compaction manually. 2486 * and user can trigger compaction manually.
@@ -1972,6 +2500,7 @@ void zs_destroy_pool(struct zs_pool *pool)
1972 int i; 2500 int i;
1973 2501
1974 zs_unregister_shrinker(pool); 2502 zs_unregister_shrinker(pool);
2503 zs_unregister_migration(pool);
1975 zs_pool_stat_destroy(pool); 2504 zs_pool_stat_destroy(pool);
1976 2505
1977 for (i = 0; i < zs_size_classes; i++) { 2506 for (i = 0; i < zs_size_classes; i++) {
@@ -1984,7 +2513,7 @@ void zs_destroy_pool(struct zs_pool *pool)
1984 if (class->index != i) 2513 if (class->index != i)
1985 continue; 2514 continue;
1986 2515
1987 for (fg = ZS_ALMOST_FULL; fg <= ZS_ALMOST_EMPTY; fg++) { 2516 for (fg = ZS_EMPTY; fg < NR_ZS_FULLNESS; fg++) {
1988 if (!list_empty(&class->fullness_list[fg])) { 2517 if (!list_empty(&class->fullness_list[fg])) {
1989 pr_info("Freeing non-empty class with size %db, fullness group %d\n", 2518 pr_info("Freeing non-empty class with size %db, fullness group %d\n",
1990 class->size, fg); 2519 class->size, fg);
@@ -2002,7 +2531,13 @@ EXPORT_SYMBOL_GPL(zs_destroy_pool);
2002 2531
2003static int __init zs_init(void) 2532static int __init zs_init(void)
2004{ 2533{
2005 int ret = zs_register_cpu_notifier(); 2534 int ret;
2535
2536 ret = zsmalloc_mount();
2537 if (ret)
2538 goto out;
2539
2540 ret = zs_register_cpu_notifier();
2006 2541
2007 if (ret) 2542 if (ret)
2008 goto notifier_fail; 2543 goto notifier_fail;
@@ -2019,7 +2554,8 @@ static int __init zs_init(void)
2019 2554
2020notifier_fail: 2555notifier_fail:
2021 zs_unregister_cpu_notifier(); 2556 zs_unregister_cpu_notifier();
2022 2557 zsmalloc_unmount();
2558out:
2023 return ret; 2559 return ret;
2024} 2560}
2025 2561
@@ -2028,6 +2564,7 @@ static void __exit zs_exit(void)
2028#ifdef CONFIG_ZPOOL 2564#ifdef CONFIG_ZPOOL
2029 zpool_unregister_driver(&zs_zpool_driver); 2565 zpool_unregister_driver(&zs_zpool_driver);
2030#endif 2566#endif
2567 zsmalloc_unmount();
2031 zs_unregister_cpu_notifier(); 2568 zs_unregister_cpu_notifier();
2032 2569
2033 zs_stat_exit(); 2570 zs_stat_exit();