aboutsummaryrefslogtreecommitdiffstats
path: root/mm/zsmalloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/zsmalloc.c')
-rw-r--r--mm/zsmalloc.c971
1 files changed, 707 insertions, 264 deletions
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 0dec1fa5f656..08bd7a3d464a 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -12,35 +12,6 @@
12 */ 12 */
13 13
14/* 14/*
15 * This allocator is designed for use with zram. Thus, the allocator is
16 * supposed to work well under low memory conditions. In particular, it
17 * never attempts higher order page allocation which is very likely to
18 * fail under memory pressure. On the other hand, if we just use single
19 * (0-order) pages, it would suffer from very high fragmentation --
20 * any object of size PAGE_SIZE/2 or larger would occupy an entire page.
21 * This was one of the major issues with its predecessor (xvmalloc).
22 *
23 * To overcome these issues, zsmalloc allocates a bunch of 0-order pages
24 * and links them together using various 'struct page' fields. These linked
25 * pages act as a single higher-order page i.e. an object can span 0-order
26 * page boundaries. The code refers to these linked pages as a single entity
27 * called zspage.
28 *
29 * For simplicity, zsmalloc can only allocate objects of size up to PAGE_SIZE
30 * since this satisfies the requirements of all its current users (in the
31 * worst case, page is incompressible and is thus stored "as-is" i.e. in
32 * uncompressed form). For allocation requests larger than this size, failure
33 * is returned (see zs_malloc).
34 *
35 * Additionally, zs_malloc() does not return a dereferenceable pointer.
36 * Instead, it returns an opaque handle (unsigned long) which encodes actual
37 * location of the allocated object. The reason for this indirection is that
38 * zsmalloc does not keep zspages permanently mapped since that would cause
39 * issues on 32-bit systems where the VA region for kernel space mappings
40 * is very small. So, before using the allocating memory, the object has to
41 * be mapped using zs_map_object() to get a usable pointer and subsequently
42 * unmapped using zs_unmap_object().
43 *
44 * Following is how we use various fields and flags of underlying 15 * Following is how we use various fields and flags of underlying
45 * struct page(s) to form a zspage. 16 * struct page(s) to form a zspage.
46 * 17 *
@@ -57,6 +28,8 @@
57 * 28 *
58 * page->private (union with page->first_page): refers to the 29 * page->private (union with page->first_page): refers to the
59 * component page after the first page 30 * component page after the first page
31 * If the page is first_page for huge object, it stores handle.
32 * Look at size_class->huge.
60 * page->freelist: points to the first free object in zspage. 33 * page->freelist: points to the first free object in zspage.
61 * Free objects are linked together using in-place 34 * Free objects are linked together using in-place
62 * metadata. 35 * metadata.
@@ -78,6 +51,7 @@
78 51
79#include <linux/module.h> 52#include <linux/module.h>
80#include <linux/kernel.h> 53#include <linux/kernel.h>
54#include <linux/sched.h>
81#include <linux/bitops.h> 55#include <linux/bitops.h>
82#include <linux/errno.h> 56#include <linux/errno.h>
83#include <linux/highmem.h> 57#include <linux/highmem.h>
@@ -110,6 +84,8 @@
110#define ZS_MAX_ZSPAGE_ORDER 2 84#define ZS_MAX_ZSPAGE_ORDER 2
111#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) 85#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
112 86
87#define ZS_HANDLE_SIZE (sizeof(unsigned long))
88
113/* 89/*
114 * Object location (<PFN>, <obj_idx>) is encoded as 90 * Object location (<PFN>, <obj_idx>) is encoded as
115 * as single (unsigned long) handle value. 91 * as single (unsigned long) handle value.
@@ -133,13 +109,33 @@
133#endif 109#endif
134#endif 110#endif
135#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) 111#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
136#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS) 112
113/*
114 * Memory for allocating for handle keeps object position by
115 * encoding <page, obj_idx> and the encoded value has a room
116 * in least bit(ie, look at obj_to_location).
117 * We use the bit to synchronize between object access by
118 * user and migration.
119 */
120#define HANDLE_PIN_BIT 0
121
122/*
123 * Head in allocated object should have OBJ_ALLOCATED_TAG
124 * to identify the object was allocated or not.
125 * It's okay to add the status bit in the least bit because
126 * header keeps handle which is 4byte-aligned address so we
127 * have room for two bit at least.
128 */
129#define OBJ_ALLOCATED_TAG 1
130#define OBJ_TAG_BITS 1
131#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
137#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) 132#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
138 133
139#define MAX(a, b) ((a) >= (b) ? (a) : (b)) 134#define MAX(a, b) ((a) >= (b) ? (a) : (b))
140/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */ 135/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
141#define ZS_MIN_ALLOC_SIZE \ 136#define ZS_MIN_ALLOC_SIZE \
142 MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS)) 137 MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
138/* each chunk includes extra space to keep handle */
143#define ZS_MAX_ALLOC_SIZE PAGE_SIZE 139#define ZS_MAX_ALLOC_SIZE PAGE_SIZE
144 140
145/* 141/*
@@ -172,6 +168,8 @@ enum fullness_group {
172enum zs_stat_type { 168enum zs_stat_type {
173 OBJ_ALLOCATED, 169 OBJ_ALLOCATED,
174 OBJ_USED, 170 OBJ_USED,
171 CLASS_ALMOST_FULL,
172 CLASS_ALMOST_EMPTY,
175 NR_ZS_STAT_TYPE, 173 NR_ZS_STAT_TYPE,
176}; 174};
177 175
@@ -216,6 +214,8 @@ struct size_class {
216 214
217 /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ 215 /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
218 int pages_per_zspage; 216 int pages_per_zspage;
217 /* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
218 bool huge;
219 219
220#ifdef CONFIG_ZSMALLOC_STAT 220#ifdef CONFIG_ZSMALLOC_STAT
221 struct zs_size_stat stats; 221 struct zs_size_stat stats;
@@ -233,14 +233,24 @@ struct size_class {
233 * This must be power of 2 and less than or equal to ZS_ALIGN 233 * This must be power of 2 and less than or equal to ZS_ALIGN
234 */ 234 */
235struct link_free { 235struct link_free {
236 /* Handle of next free chunk (encodes <PFN, obj_idx>) */ 236 union {
237 void *next; 237 /*
238 * Position of next free chunk (encodes <PFN, obj_idx>)
239 * It's valid for non-allocated object
240 */
241 void *next;
242 /*
243 * Handle of allocated object.
244 */
245 unsigned long handle;
246 };
238}; 247};
239 248
240struct zs_pool { 249struct zs_pool {
241 char *name; 250 char *name;
242 251
243 struct size_class **size_class; 252 struct size_class **size_class;
253 struct kmem_cache *handle_cachep;
244 254
245 gfp_t flags; /* allocation flags used when growing pool */ 255 gfp_t flags; /* allocation flags used when growing pool */
246 atomic_long_t pages_allocated; 256 atomic_long_t pages_allocated;
@@ -267,8 +277,37 @@ struct mapping_area {
267#endif 277#endif
268 char *vm_addr; /* address of kmap_atomic()'ed pages */ 278 char *vm_addr; /* address of kmap_atomic()'ed pages */
269 enum zs_mapmode vm_mm; /* mapping mode */ 279 enum zs_mapmode vm_mm; /* mapping mode */
280 bool huge;
270}; 281};
271 282
283static int create_handle_cache(struct zs_pool *pool)
284{
285 pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
286 0, 0, NULL);
287 return pool->handle_cachep ? 0 : 1;
288}
289
290static void destroy_handle_cache(struct zs_pool *pool)
291{
292 kmem_cache_destroy(pool->handle_cachep);
293}
294
295static unsigned long alloc_handle(struct zs_pool *pool)
296{
297 return (unsigned long)kmem_cache_alloc(pool->handle_cachep,
298 pool->flags & ~__GFP_HIGHMEM);
299}
300
301static void free_handle(struct zs_pool *pool, unsigned long handle)
302{
303 kmem_cache_free(pool->handle_cachep, (void *)handle);
304}
305
306static void record_obj(unsigned long handle, unsigned long obj)
307{
308 *(unsigned long *)handle = obj;
309}
310
272/* zpool driver */ 311/* zpool driver */
273 312
274#ifdef CONFIG_ZPOOL 313#ifdef CONFIG_ZPOOL
@@ -346,6 +385,11 @@ static struct zpool_driver zs_zpool_driver = {
346MODULE_ALIAS("zpool-zsmalloc"); 385MODULE_ALIAS("zpool-zsmalloc");
347#endif /* CONFIG_ZPOOL */ 386#endif /* CONFIG_ZPOOL */
348 387
388static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
389{
390 return pages_per_zspage * PAGE_SIZE / size;
391}
392
349/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ 393/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
350static DEFINE_PER_CPU(struct mapping_area, zs_map_area); 394static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
351 395
@@ -396,9 +440,182 @@ static int get_size_class_index(int size)
396 idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE, 440 idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE,
397 ZS_SIZE_CLASS_DELTA); 441 ZS_SIZE_CLASS_DELTA);
398 442
399 return idx; 443 return min(zs_size_classes - 1, idx);
444}
445
446#ifdef CONFIG_ZSMALLOC_STAT
447
448static inline void zs_stat_inc(struct size_class *class,
449 enum zs_stat_type type, unsigned long cnt)
450{
451 class->stats.objs[type] += cnt;
452}
453
454static inline void zs_stat_dec(struct size_class *class,
455 enum zs_stat_type type, unsigned long cnt)
456{
457 class->stats.objs[type] -= cnt;
458}
459
460static inline unsigned long zs_stat_get(struct size_class *class,
461 enum zs_stat_type type)
462{
463 return class->stats.objs[type];
464}
465
466static int __init zs_stat_init(void)
467{
468 if (!debugfs_initialized())
469 return -ENODEV;
470
471 zs_stat_root = debugfs_create_dir("zsmalloc", NULL);
472 if (!zs_stat_root)
473 return -ENOMEM;
474
475 return 0;
476}
477
478static void __exit zs_stat_exit(void)
479{
480 debugfs_remove_recursive(zs_stat_root);
481}
482
483static int zs_stats_size_show(struct seq_file *s, void *v)
484{
485 int i;
486 struct zs_pool *pool = s->private;
487 struct size_class *class;
488 int objs_per_zspage;
489 unsigned long class_almost_full, class_almost_empty;
490 unsigned long obj_allocated, obj_used, pages_used;
491 unsigned long total_class_almost_full = 0, total_class_almost_empty = 0;
492 unsigned long total_objs = 0, total_used_objs = 0, total_pages = 0;
493
494 seq_printf(s, " %5s %5s %11s %12s %13s %10s %10s %16s\n",
495 "class", "size", "almost_full", "almost_empty",
496 "obj_allocated", "obj_used", "pages_used",
497 "pages_per_zspage");
498
499 for (i = 0; i < zs_size_classes; i++) {
500 class = pool->size_class[i];
501
502 if (class->index != i)
503 continue;
504
505 spin_lock(&class->lock);
506 class_almost_full = zs_stat_get(class, CLASS_ALMOST_FULL);
507 class_almost_empty = zs_stat_get(class, CLASS_ALMOST_EMPTY);
508 obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
509 obj_used = zs_stat_get(class, OBJ_USED);
510 spin_unlock(&class->lock);
511
512 objs_per_zspage = get_maxobj_per_zspage(class->size,
513 class->pages_per_zspage);
514 pages_used = obj_allocated / objs_per_zspage *
515 class->pages_per_zspage;
516
517 seq_printf(s, " %5u %5u %11lu %12lu %13lu %10lu %10lu %16d\n",
518 i, class->size, class_almost_full, class_almost_empty,
519 obj_allocated, obj_used, pages_used,
520 class->pages_per_zspage);
521
522 total_class_almost_full += class_almost_full;
523 total_class_almost_empty += class_almost_empty;
524 total_objs += obj_allocated;
525 total_used_objs += obj_used;
526 total_pages += pages_used;
527 }
528
529 seq_puts(s, "\n");
530 seq_printf(s, " %5s %5s %11lu %12lu %13lu %10lu %10lu\n",
531 "Total", "", total_class_almost_full,
532 total_class_almost_empty, total_objs,
533 total_used_objs, total_pages);
534
535 return 0;
536}
537
538static int zs_stats_size_open(struct inode *inode, struct file *file)
539{
540 return single_open(file, zs_stats_size_show, inode->i_private);
541}
542
543static const struct file_operations zs_stat_size_ops = {
544 .open = zs_stats_size_open,
545 .read = seq_read,
546 .llseek = seq_lseek,
547 .release = single_release,
548};
549
550static int zs_pool_stat_create(char *name, struct zs_pool *pool)
551{
552 struct dentry *entry;
553
554 if (!zs_stat_root)
555 return -ENODEV;
556
557 entry = debugfs_create_dir(name, zs_stat_root);
558 if (!entry) {
559 pr_warn("debugfs dir <%s> creation failed\n", name);
560 return -ENOMEM;
561 }
562 pool->stat_dentry = entry;
563
564 entry = debugfs_create_file("classes", S_IFREG | S_IRUGO,
565 pool->stat_dentry, pool, &zs_stat_size_ops);
566 if (!entry) {
567 pr_warn("%s: debugfs file entry <%s> creation failed\n",
568 name, "classes");
569 return -ENOMEM;
570 }
571
572 return 0;
573}
574
575static void zs_pool_stat_destroy(struct zs_pool *pool)
576{
577 debugfs_remove_recursive(pool->stat_dentry);
578}
579
580#else /* CONFIG_ZSMALLOC_STAT */
581
582static inline void zs_stat_inc(struct size_class *class,
583 enum zs_stat_type type, unsigned long cnt)
584{
585}
586
587static inline void zs_stat_dec(struct size_class *class,
588 enum zs_stat_type type, unsigned long cnt)
589{
590}
591
592static inline unsigned long zs_stat_get(struct size_class *class,
593 enum zs_stat_type type)
594{
595 return 0;
596}
597
598static int __init zs_stat_init(void)
599{
600 return 0;
601}
602
603static void __exit zs_stat_exit(void)
604{
605}
606
607static inline int zs_pool_stat_create(char *name, struct zs_pool *pool)
608{
609 return 0;
610}
611
612static inline void zs_pool_stat_destroy(struct zs_pool *pool)
613{
400} 614}
401 615
616#endif
617
618
402/* 619/*
403 * For each size class, zspages are divided into different groups 620 * For each size class, zspages are divided into different groups
404 * depending on how "full" they are. This was done so that we could 621 * depending on how "full" they are. This was done so that we could
@@ -419,7 +636,7 @@ static enum fullness_group get_fullness_group(struct page *page)
419 fg = ZS_EMPTY; 636 fg = ZS_EMPTY;
420 else if (inuse == max_objects) 637 else if (inuse == max_objects)
421 fg = ZS_FULL; 638 fg = ZS_FULL;
422 else if (inuse <= max_objects / fullness_threshold_frac) 639 else if (inuse <= 3 * max_objects / fullness_threshold_frac)
423 fg = ZS_ALMOST_EMPTY; 640 fg = ZS_ALMOST_EMPTY;
424 else 641 else
425 fg = ZS_ALMOST_FULL; 642 fg = ZS_ALMOST_FULL;
@@ -448,6 +665,8 @@ static void insert_zspage(struct page *page, struct size_class *class,
448 list_add_tail(&page->lru, &(*head)->lru); 665 list_add_tail(&page->lru, &(*head)->lru);
449 666
450 *head = page; 667 *head = page;
668 zs_stat_inc(class, fullness == ZS_ALMOST_EMPTY ?
669 CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);
451} 670}
452 671
453/* 672/*
@@ -473,6 +692,8 @@ static void remove_zspage(struct page *page, struct size_class *class,
473 struct page, lru); 692 struct page, lru);
474 693
475 list_del_init(&page->lru); 694 list_del_init(&page->lru);
695 zs_stat_dec(class, fullness == ZS_ALMOST_EMPTY ?
696 CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);
476} 697}
477 698
478/* 699/*
@@ -484,11 +705,10 @@ static void remove_zspage(struct page *page, struct size_class *class,
484 * page from the freelist of the old fullness group to that of the new 705 * page from the freelist of the old fullness group to that of the new
485 * fullness group. 706 * fullness group.
486 */ 707 */
487static enum fullness_group fix_fullness_group(struct zs_pool *pool, 708static enum fullness_group fix_fullness_group(struct size_class *class,
488 struct page *page) 709 struct page *page)
489{ 710{
490 int class_idx; 711 int class_idx;
491 struct size_class *class;
492 enum fullness_group currfg, newfg; 712 enum fullness_group currfg, newfg;
493 713
494 BUG_ON(!is_first_page(page)); 714 BUG_ON(!is_first_page(page));
@@ -498,7 +718,6 @@ static enum fullness_group fix_fullness_group(struct zs_pool *pool,
498 if (newfg == currfg) 718 if (newfg == currfg)
499 goto out; 719 goto out;
500 720
501 class = pool->size_class[class_idx];
502 remove_zspage(page, class, currfg); 721 remove_zspage(page, class, currfg);
503 insert_zspage(page, class, newfg); 722 insert_zspage(page, class, newfg);
504 set_zspage_mapping(page, class_idx, newfg); 723 set_zspage_mapping(page, class_idx, newfg);
@@ -512,7 +731,8 @@ out:
512 * to form a zspage for each size class. This is important 731 * to form a zspage for each size class. This is important
513 * to reduce wastage due to unusable space left at end of 732 * to reduce wastage due to unusable space left at end of
514 * each zspage which is given as: 733 * each zspage which is given as:
515 * wastage = Zp - Zp % size_class 734 * wastage = Zp % class_size
735 * usage = Zp - wastage
516 * where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ... 736 * where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ...
517 * 737 *
518 * For example, for size class of 3/8 * PAGE_SIZE, we should 738 * For example, for size class of 3/8 * PAGE_SIZE, we should
@@ -571,35 +791,50 @@ static struct page *get_next_page(struct page *page)
571 791
572/* 792/*
573 * Encode <page, obj_idx> as a single handle value. 793 * Encode <page, obj_idx> as a single handle value.
574 * On hardware platforms with physical memory starting at 0x0 the pfn 794 * We use the least bit of handle for tagging.
575 * could be 0 so we ensure that the handle will never be 0 by adjusting the
576 * encoded obj_idx value before encoding.
577 */ 795 */
578static void *obj_location_to_handle(struct page *page, unsigned long obj_idx) 796static void *location_to_obj(struct page *page, unsigned long obj_idx)
579{ 797{
580 unsigned long handle; 798 unsigned long obj;
581 799
582 if (!page) { 800 if (!page) {
583 BUG_ON(obj_idx); 801 BUG_ON(obj_idx);
584 return NULL; 802 return NULL;
585 } 803 }
586 804
587 handle = page_to_pfn(page) << OBJ_INDEX_BITS; 805 obj = page_to_pfn(page) << OBJ_INDEX_BITS;
588 handle |= ((obj_idx + 1) & OBJ_INDEX_MASK); 806 obj |= ((obj_idx) & OBJ_INDEX_MASK);
807 obj <<= OBJ_TAG_BITS;
589 808
590 return (void *)handle; 809 return (void *)obj;
591} 810}
592 811
593/* 812/*
594 * Decode <page, obj_idx> pair from the given object handle. We adjust the 813 * Decode <page, obj_idx> pair from the given object handle. We adjust the
595 * decoded obj_idx back to its original value since it was adjusted in 814 * decoded obj_idx back to its original value since it was adjusted in
596 * obj_location_to_handle(). 815 * location_to_obj().
597 */ 816 */
598static void obj_handle_to_location(unsigned long handle, struct page **page, 817static void obj_to_location(unsigned long obj, struct page **page,
599 unsigned long *obj_idx) 818 unsigned long *obj_idx)
600{ 819{
601 *page = pfn_to_page(handle >> OBJ_INDEX_BITS); 820 obj >>= OBJ_TAG_BITS;
602 *obj_idx = (handle & OBJ_INDEX_MASK) - 1; 821 *page = pfn_to_page(obj >> OBJ_INDEX_BITS);
822 *obj_idx = (obj & OBJ_INDEX_MASK);
823}
824
825static unsigned long handle_to_obj(unsigned long handle)
826{
827 return *(unsigned long *)handle;
828}
829
830static unsigned long obj_to_head(struct size_class *class, struct page *page,
831 void *obj)
832{
833 if (class->huge) {
834 VM_BUG_ON(!is_first_page(page));
835 return *(unsigned long *)page_private(page);
836 } else
837 return *(unsigned long *)obj;
603} 838}
604 839
605static unsigned long obj_idx_to_offset(struct page *page, 840static unsigned long obj_idx_to_offset(struct page *page,
@@ -613,6 +848,25 @@ static unsigned long obj_idx_to_offset(struct page *page,
613 return off + obj_idx * class_size; 848 return off + obj_idx * class_size;
614} 849}
615 850
851static inline int trypin_tag(unsigned long handle)
852{
853 unsigned long *ptr = (unsigned long *)handle;
854
855 return !test_and_set_bit_lock(HANDLE_PIN_BIT, ptr);
856}
857
858static void pin_tag(unsigned long handle)
859{
860 while (!trypin_tag(handle));
861}
862
863static void unpin_tag(unsigned long handle)
864{
865 unsigned long *ptr = (unsigned long *)handle;
866
867 clear_bit_unlock(HANDLE_PIN_BIT, ptr);
868}
869
616static void reset_page(struct page *page) 870static void reset_page(struct page *page)
617{ 871{
618 clear_bit(PG_private, &page->flags); 872 clear_bit(PG_private, &page->flags);
@@ -674,7 +928,7 @@ static void init_zspage(struct page *first_page, struct size_class *class)
674 link = (struct link_free *)vaddr + off / sizeof(*link); 928 link = (struct link_free *)vaddr + off / sizeof(*link);
675 929
676 while ((off += class->size) < PAGE_SIZE) { 930 while ((off += class->size) < PAGE_SIZE) {
677 link->next = obj_location_to_handle(page, i++); 931 link->next = location_to_obj(page, i++);
678 link += class->size / sizeof(*link); 932 link += class->size / sizeof(*link);
679 } 933 }
680 934
@@ -684,7 +938,7 @@ static void init_zspage(struct page *first_page, struct size_class *class)
684 * page (if present) 938 * page (if present)
685 */ 939 */
686 next_page = get_next_page(page); 940 next_page = get_next_page(page);
687 link->next = obj_location_to_handle(next_page, 0); 941 link->next = location_to_obj(next_page, 0);
688 kunmap_atomic(vaddr); 942 kunmap_atomic(vaddr);
689 page = next_page; 943 page = next_page;
690 off %= PAGE_SIZE; 944 off %= PAGE_SIZE;
@@ -738,7 +992,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
738 992
739 init_zspage(first_page, class); 993 init_zspage(first_page, class);
740 994
741 first_page->freelist = obj_location_to_handle(first_page, 0); 995 first_page->freelist = location_to_obj(first_page, 0);
742 /* Maximum number of objects we can store in this zspage */ 996 /* Maximum number of objects we can store in this zspage */
743 first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size; 997 first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;
744 998
@@ -860,12 +1114,19 @@ static void __zs_unmap_object(struct mapping_area *area,
860{ 1114{
861 int sizes[2]; 1115 int sizes[2];
862 void *addr; 1116 void *addr;
863 char *buf = area->vm_buf; 1117 char *buf;
864 1118
865 /* no write fastpath */ 1119 /* no write fastpath */
866 if (area->vm_mm == ZS_MM_RO) 1120 if (area->vm_mm == ZS_MM_RO)
867 goto out; 1121 goto out;
868 1122
1123 buf = area->vm_buf;
1124 if (!area->huge) {
1125 buf = buf + ZS_HANDLE_SIZE;
1126 size -= ZS_HANDLE_SIZE;
1127 off += ZS_HANDLE_SIZE;
1128 }
1129
869 sizes[0] = PAGE_SIZE - off; 1130 sizes[0] = PAGE_SIZE - off;
870 sizes[1] = size - sizes[0]; 1131 sizes[1] = size - sizes[0];
871 1132
@@ -952,11 +1213,6 @@ static void init_zs_size_classes(void)
952 zs_size_classes = nr; 1213 zs_size_classes = nr;
953} 1214}
954 1215
955static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
956{
957 return pages_per_zspage * PAGE_SIZE / size;
958}
959
960static bool can_merge(struct size_class *prev, int size, int pages_per_zspage) 1216static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)
961{ 1217{
962 if (prev->pages_per_zspage != pages_per_zspage) 1218 if (prev->pages_per_zspage != pages_per_zspage)
@@ -969,166 +1225,13 @@ static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)
969 return true; 1225 return true;
970} 1226}
971 1227
972#ifdef CONFIG_ZSMALLOC_STAT 1228static bool zspage_full(struct page *page)
973
974static inline void zs_stat_inc(struct size_class *class,
975 enum zs_stat_type type, unsigned long cnt)
976{
977 class->stats.objs[type] += cnt;
978}
979
980static inline void zs_stat_dec(struct size_class *class,
981 enum zs_stat_type type, unsigned long cnt)
982{
983 class->stats.objs[type] -= cnt;
984}
985
986static inline unsigned long zs_stat_get(struct size_class *class,
987 enum zs_stat_type type)
988{
989 return class->stats.objs[type];
990}
991
992static int __init zs_stat_init(void)
993{
994 if (!debugfs_initialized())
995 return -ENODEV;
996
997 zs_stat_root = debugfs_create_dir("zsmalloc", NULL);
998 if (!zs_stat_root)
999 return -ENOMEM;
1000
1001 return 0;
1002}
1003
1004static void __exit zs_stat_exit(void)
1005{
1006 debugfs_remove_recursive(zs_stat_root);
1007}
1008
1009static int zs_stats_size_show(struct seq_file *s, void *v)
1010{ 1229{
1011 int i; 1230 BUG_ON(!is_first_page(page));
1012 struct zs_pool *pool = s->private;
1013 struct size_class *class;
1014 int objs_per_zspage;
1015 unsigned long obj_allocated, obj_used, pages_used;
1016 unsigned long total_objs = 0, total_used_objs = 0, total_pages = 0;
1017
1018 seq_printf(s, " %5s %5s %13s %10s %10s\n", "class", "size",
1019 "obj_allocated", "obj_used", "pages_used");
1020
1021 for (i = 0; i < zs_size_classes; i++) {
1022 class = pool->size_class[i];
1023
1024 if (class->index != i)
1025 continue;
1026
1027 spin_lock(&class->lock);
1028 obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
1029 obj_used = zs_stat_get(class, OBJ_USED);
1030 spin_unlock(&class->lock);
1031
1032 objs_per_zspage = get_maxobj_per_zspage(class->size,
1033 class->pages_per_zspage);
1034 pages_used = obj_allocated / objs_per_zspage *
1035 class->pages_per_zspage;
1036
1037 seq_printf(s, " %5u %5u %10lu %10lu %10lu\n", i,
1038 class->size, obj_allocated, obj_used, pages_used);
1039
1040 total_objs += obj_allocated;
1041 total_used_objs += obj_used;
1042 total_pages += pages_used;
1043 }
1044
1045 seq_puts(s, "\n");
1046 seq_printf(s, " %5s %5s %10lu %10lu %10lu\n", "Total", "",
1047 total_objs, total_used_objs, total_pages);
1048
1049 return 0;
1050}
1051
1052static int zs_stats_size_open(struct inode *inode, struct file *file)
1053{
1054 return single_open(file, zs_stats_size_show, inode->i_private);
1055}
1056
1057static const struct file_operations zs_stat_size_ops = {
1058 .open = zs_stats_size_open,
1059 .read = seq_read,
1060 .llseek = seq_lseek,
1061 .release = single_release,
1062};
1063
1064static int zs_pool_stat_create(char *name, struct zs_pool *pool)
1065{
1066 struct dentry *entry;
1067
1068 if (!zs_stat_root)
1069 return -ENODEV;
1070
1071 entry = debugfs_create_dir(name, zs_stat_root);
1072 if (!entry) {
1073 pr_warn("debugfs dir <%s> creation failed\n", name);
1074 return -ENOMEM;
1075 }
1076 pool->stat_dentry = entry;
1077
1078 entry = debugfs_create_file("obj_in_classes", S_IFREG | S_IRUGO,
1079 pool->stat_dentry, pool, &zs_stat_size_ops);
1080 if (!entry) {
1081 pr_warn("%s: debugfs file entry <%s> creation failed\n",
1082 name, "obj_in_classes");
1083 return -ENOMEM;
1084 }
1085
1086 return 0;
1087}
1088
1089static void zs_pool_stat_destroy(struct zs_pool *pool)
1090{
1091 debugfs_remove_recursive(pool->stat_dentry);
1092}
1093
1094#else /* CONFIG_ZSMALLOC_STAT */
1095
1096static inline void zs_stat_inc(struct size_class *class,
1097 enum zs_stat_type type, unsigned long cnt)
1098{
1099}
1100
1101static inline void zs_stat_dec(struct size_class *class,
1102 enum zs_stat_type type, unsigned long cnt)
1103{
1104}
1105
1106static inline unsigned long zs_stat_get(struct size_class *class,
1107 enum zs_stat_type type)
1108{
1109 return 0;
1110}
1111
1112static int __init zs_stat_init(void)
1113{
1114 return 0;
1115}
1116
1117static void __exit zs_stat_exit(void)
1118{
1119}
1120
1121static inline int zs_pool_stat_create(char *name, struct zs_pool *pool)
1122{
1123 return 0;
1124}
1125 1231
1126static inline void zs_pool_stat_destroy(struct zs_pool *pool) 1232 return page->inuse == page->objects;
1127{
1128} 1233}
1129 1234
1130#endif
1131
1132unsigned long zs_get_total_pages(struct zs_pool *pool) 1235unsigned long zs_get_total_pages(struct zs_pool *pool)
1133{ 1236{
1134 return atomic_long_read(&pool->pages_allocated); 1237 return atomic_long_read(&pool->pages_allocated);
@@ -1153,13 +1256,14 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
1153 enum zs_mapmode mm) 1256 enum zs_mapmode mm)
1154{ 1257{
1155 struct page *page; 1258 struct page *page;
1156 unsigned long obj_idx, off; 1259 unsigned long obj, obj_idx, off;
1157 1260
1158 unsigned int class_idx; 1261 unsigned int class_idx;
1159 enum fullness_group fg; 1262 enum fullness_group fg;
1160 struct size_class *class; 1263 struct size_class *class;
1161 struct mapping_area *area; 1264 struct mapping_area *area;
1162 struct page *pages[2]; 1265 struct page *pages[2];
1266 void *ret;
1163 1267
1164 BUG_ON(!handle); 1268 BUG_ON(!handle);
1165 1269
@@ -1170,7 +1274,11 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
1170 */ 1274 */
1171 BUG_ON(in_interrupt()); 1275 BUG_ON(in_interrupt());
1172 1276
1173 obj_handle_to_location(handle, &page, &obj_idx); 1277 /* From now on, migration cannot move the object */
1278 pin_tag(handle);
1279
1280 obj = handle_to_obj(handle);
1281 obj_to_location(obj, &page, &obj_idx);
1174 get_zspage_mapping(get_first_page(page), &class_idx, &fg); 1282 get_zspage_mapping(get_first_page(page), &class_idx, &fg);
1175 class = pool->size_class[class_idx]; 1283 class = pool->size_class[class_idx];
1176 off = obj_idx_to_offset(page, obj_idx, class->size); 1284 off = obj_idx_to_offset(page, obj_idx, class->size);
@@ -1180,7 +1288,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
1180 if (off + class->size <= PAGE_SIZE) { 1288 if (off + class->size <= PAGE_SIZE) {
1181 /* this object is contained entirely within a page */ 1289 /* this object is contained entirely within a page */
1182 area->vm_addr = kmap_atomic(page); 1290 area->vm_addr = kmap_atomic(page);
1183 return area->vm_addr + off; 1291 ret = area->vm_addr + off;
1292 goto out;
1184 } 1293 }
1185 1294
1186 /* this object spans two pages */ 1295 /* this object spans two pages */
@@ -1188,14 +1297,19 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
1188 pages[1] = get_next_page(page); 1297 pages[1] = get_next_page(page);
1189 BUG_ON(!pages[1]); 1298 BUG_ON(!pages[1]);
1190 1299
1191 return __zs_map_object(area, pages, off, class->size); 1300 ret = __zs_map_object(area, pages, off, class->size);
1301out:
1302 if (!class->huge)
1303 ret += ZS_HANDLE_SIZE;
1304
1305 return ret;
1192} 1306}
1193EXPORT_SYMBOL_GPL(zs_map_object); 1307EXPORT_SYMBOL_GPL(zs_map_object);
1194 1308
1195void zs_unmap_object(struct zs_pool *pool, unsigned long handle) 1309void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
1196{ 1310{
1197 struct page *page; 1311 struct page *page;
1198 unsigned long obj_idx, off; 1312 unsigned long obj, obj_idx, off;
1199 1313
1200 unsigned int class_idx; 1314 unsigned int class_idx;
1201 enum fullness_group fg; 1315 enum fullness_group fg;
@@ -1204,7 +1318,8 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
1204 1318
1205 BUG_ON(!handle); 1319 BUG_ON(!handle);
1206 1320
1207 obj_handle_to_location(handle, &page, &obj_idx); 1321 obj = handle_to_obj(handle);
1322 obj_to_location(obj, &page, &obj_idx);
1208 get_zspage_mapping(get_first_page(page), &class_idx, &fg); 1323 get_zspage_mapping(get_first_page(page), &class_idx, &fg);
1209 class = pool->size_class[class_idx]; 1324 class = pool->size_class[class_idx];
1210 off = obj_idx_to_offset(page, obj_idx, class->size); 1325 off = obj_idx_to_offset(page, obj_idx, class->size);
@@ -1222,9 +1337,42 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
1222 __zs_unmap_object(area, pages, off, class->size); 1337 __zs_unmap_object(area, pages, off, class->size);
1223 } 1338 }
1224 put_cpu_var(zs_map_area); 1339 put_cpu_var(zs_map_area);
1340 unpin_tag(handle);
1225} 1341}
1226EXPORT_SYMBOL_GPL(zs_unmap_object); 1342EXPORT_SYMBOL_GPL(zs_unmap_object);
1227 1343
1344static unsigned long obj_malloc(struct page *first_page,
1345 struct size_class *class, unsigned long handle)
1346{
1347 unsigned long obj;
1348 struct link_free *link;
1349
1350 struct page *m_page;
1351 unsigned long m_objidx, m_offset;
1352 void *vaddr;
1353
1354 handle |= OBJ_ALLOCATED_TAG;
1355 obj = (unsigned long)first_page->freelist;
1356 obj_to_location(obj, &m_page, &m_objidx);
1357 m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
1358
1359 vaddr = kmap_atomic(m_page);
1360 link = (struct link_free *)vaddr + m_offset / sizeof(*link);
1361 first_page->freelist = link->next;
1362 if (!class->huge)
1363 /* record handle in the header of allocated chunk */
1364 link->handle = handle;
1365 else
1366 /* record handle in first_page->private */
1367 set_page_private(first_page, handle);
1368 kunmap_atomic(vaddr);
1369 first_page->inuse++;
1370 zs_stat_inc(class, OBJ_USED, 1);
1371
1372 return obj;
1373}
1374
1375
1228/** 1376/**
1229 * zs_malloc - Allocate block of given size from pool. 1377 * zs_malloc - Allocate block of given size from pool.
1230 * @pool: pool to allocate from 1378 * @pool: pool to allocate from
@@ -1236,17 +1384,19 @@ EXPORT_SYMBOL_GPL(zs_unmap_object);
1236 */ 1384 */
1237unsigned long zs_malloc(struct zs_pool *pool, size_t size) 1385unsigned long zs_malloc(struct zs_pool *pool, size_t size)
1238{ 1386{
1239 unsigned long obj; 1387 unsigned long handle, obj;
1240 struct link_free *link;
1241 struct size_class *class; 1388 struct size_class *class;
1242 void *vaddr; 1389 struct page *first_page;
1243
1244 struct page *first_page, *m_page;
1245 unsigned long m_objidx, m_offset;
1246 1390
1247 if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE)) 1391 if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
1248 return 0; 1392 return 0;
1249 1393
1394 handle = alloc_handle(pool);
1395 if (!handle)
1396 return 0;
1397
1398 /* extra space in chunk to keep the handle */
1399 size += ZS_HANDLE_SIZE;
1250 class = pool->size_class[get_size_class_index(size)]; 1400 class = pool->size_class[get_size_class_index(size)];
1251 1401
1252 spin_lock(&class->lock); 1402 spin_lock(&class->lock);
@@ -1255,8 +1405,10 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
1255 if (!first_page) { 1405 if (!first_page) {
1256 spin_unlock(&class->lock); 1406 spin_unlock(&class->lock);
1257 first_page = alloc_zspage(class, pool->flags); 1407 first_page = alloc_zspage(class, pool->flags);
1258 if (unlikely(!first_page)) 1408 if (unlikely(!first_page)) {
1409 free_handle(pool, handle);
1259 return 0; 1410 return 0;
1411 }
1260 1412
1261 set_zspage_mapping(first_page, class->index, ZS_EMPTY); 1413 set_zspage_mapping(first_page, class->index, ZS_EMPTY);
1262 atomic_long_add(class->pages_per_zspage, 1414 atomic_long_add(class->pages_per_zspage,
@@ -1267,73 +1419,360 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
1267 class->size, class->pages_per_zspage)); 1419 class->size, class->pages_per_zspage));
1268 } 1420 }
1269 1421
1270 obj = (unsigned long)first_page->freelist; 1422 obj = obj_malloc(first_page, class, handle);
1271 obj_handle_to_location(obj, &m_page, &m_objidx);
1272 m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
1273
1274 vaddr = kmap_atomic(m_page);
1275 link = (struct link_free *)vaddr + m_offset / sizeof(*link);
1276 first_page->freelist = link->next;
1277 memset(link, POISON_INUSE, sizeof(*link));
1278 kunmap_atomic(vaddr);
1279
1280 first_page->inuse++;
1281 zs_stat_inc(class, OBJ_USED, 1);
1282 /* Now move the zspage to another fullness group, if required */ 1423 /* Now move the zspage to another fullness group, if required */
1283 fix_fullness_group(pool, first_page); 1424 fix_fullness_group(class, first_page);
1425 record_obj(handle, obj);
1284 spin_unlock(&class->lock); 1426 spin_unlock(&class->lock);
1285 1427
1286 return obj; 1428 return handle;
1287} 1429}
1288EXPORT_SYMBOL_GPL(zs_malloc); 1430EXPORT_SYMBOL_GPL(zs_malloc);
1289 1431
1290void zs_free(struct zs_pool *pool, unsigned long obj) 1432static void obj_free(struct zs_pool *pool, struct size_class *class,
1433 unsigned long obj)
1291{ 1434{
1292 struct link_free *link; 1435 struct link_free *link;
1293 struct page *first_page, *f_page; 1436 struct page *first_page, *f_page;
1294 unsigned long f_objidx, f_offset; 1437 unsigned long f_objidx, f_offset;
1295 void *vaddr; 1438 void *vaddr;
1296
1297 int class_idx; 1439 int class_idx;
1298 struct size_class *class;
1299 enum fullness_group fullness; 1440 enum fullness_group fullness;
1300 1441
1301 if (unlikely(!obj)) 1442 BUG_ON(!obj);
1302 return;
1303 1443
1304 obj_handle_to_location(obj, &f_page, &f_objidx); 1444 obj &= ~OBJ_ALLOCATED_TAG;
1445 obj_to_location(obj, &f_page, &f_objidx);
1305 first_page = get_first_page(f_page); 1446 first_page = get_first_page(f_page);
1306 1447
1307 get_zspage_mapping(first_page, &class_idx, &fullness); 1448 get_zspage_mapping(first_page, &class_idx, &fullness);
1308 class = pool->size_class[class_idx];
1309 f_offset = obj_idx_to_offset(f_page, f_objidx, class->size); 1449 f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);
1310 1450
1311 spin_lock(&class->lock); 1451 vaddr = kmap_atomic(f_page);
1312 1452
1313 /* Insert this object in containing zspage's freelist */ 1453 /* Insert this object in containing zspage's freelist */
1314 vaddr = kmap_atomic(f_page);
1315 link = (struct link_free *)(vaddr + f_offset); 1454 link = (struct link_free *)(vaddr + f_offset);
1316 link->next = first_page->freelist; 1455 link->next = first_page->freelist;
1456 if (class->huge)
1457 set_page_private(first_page, 0);
1317 kunmap_atomic(vaddr); 1458 kunmap_atomic(vaddr);
1318 first_page->freelist = (void *)obj; 1459 first_page->freelist = (void *)obj;
1319
1320 first_page->inuse--; 1460 first_page->inuse--;
1321 fullness = fix_fullness_group(pool, first_page);
1322
1323 zs_stat_dec(class, OBJ_USED, 1); 1461 zs_stat_dec(class, OBJ_USED, 1);
1324 if (fullness == ZS_EMPTY) 1462}
1463
1464void zs_free(struct zs_pool *pool, unsigned long handle)
1465{
1466 struct page *first_page, *f_page;
1467 unsigned long obj, f_objidx;
1468 int class_idx;
1469 struct size_class *class;
1470 enum fullness_group fullness;
1471
1472 if (unlikely(!handle))
1473 return;
1474
1475 pin_tag(handle);
1476 obj = handle_to_obj(handle);
1477 obj_to_location(obj, &f_page, &f_objidx);
1478 first_page = get_first_page(f_page);
1479
1480 get_zspage_mapping(first_page, &class_idx, &fullness);
1481 class = pool->size_class[class_idx];
1482
1483 spin_lock(&class->lock);
1484 obj_free(pool, class, obj);
1485 fullness = fix_fullness_group(class, first_page);
1486 if (fullness == ZS_EMPTY) {
1325 zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage( 1487 zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
1326 class->size, class->pages_per_zspage)); 1488 class->size, class->pages_per_zspage));
1327 1489 atomic_long_sub(class->pages_per_zspage,
1490 &pool->pages_allocated);
1491 free_zspage(first_page);
1492 }
1328 spin_unlock(&class->lock); 1493 spin_unlock(&class->lock);
1494 unpin_tag(handle);
1495
1496 free_handle(pool, handle);
1497}
1498EXPORT_SYMBOL_GPL(zs_free);
1499
1500static void zs_object_copy(unsigned long src, unsigned long dst,
1501 struct size_class *class)
1502{
1503 struct page *s_page, *d_page;
1504 unsigned long s_objidx, d_objidx;
1505 unsigned long s_off, d_off;
1506 void *s_addr, *d_addr;
1507 int s_size, d_size, size;
1508 int written = 0;
1509
1510 s_size = d_size = class->size;
1511
1512 obj_to_location(src, &s_page, &s_objidx);
1513 obj_to_location(dst, &d_page, &d_objidx);
1514
1515 s_off = obj_idx_to_offset(s_page, s_objidx, class->size);
1516 d_off = obj_idx_to_offset(d_page, d_objidx, class->size);
1517
1518 if (s_off + class->size > PAGE_SIZE)
1519 s_size = PAGE_SIZE - s_off;
1520
1521 if (d_off + class->size > PAGE_SIZE)
1522 d_size = PAGE_SIZE - d_off;
1523
1524 s_addr = kmap_atomic(s_page);
1525 d_addr = kmap_atomic(d_page);
1526
1527 while (1) {
1528 size = min(s_size, d_size);
1529 memcpy(d_addr + d_off, s_addr + s_off, size);
1530 written += size;
1531
1532 if (written == class->size)
1533 break;
1534
1535 s_off += size;
1536 s_size -= size;
1537 d_off += size;
1538 d_size -= size;
1539
1540 if (s_off >= PAGE_SIZE) {
1541 kunmap_atomic(d_addr);
1542 kunmap_atomic(s_addr);
1543 s_page = get_next_page(s_page);
1544 BUG_ON(!s_page);
1545 s_addr = kmap_atomic(s_page);
1546 d_addr = kmap_atomic(d_page);
1547 s_size = class->size - written;
1548 s_off = 0;
1549 }
1550
1551 if (d_off >= PAGE_SIZE) {
1552 kunmap_atomic(d_addr);
1553 d_page = get_next_page(d_page);
1554 BUG_ON(!d_page);
1555 d_addr = kmap_atomic(d_page);
1556 d_size = class->size - written;
1557 d_off = 0;
1558 }
1559 }
1560
1561 kunmap_atomic(d_addr);
1562 kunmap_atomic(s_addr);
1563}
1564
1565/*
1566 * Find alloced object in zspage from index object and
1567 * return handle.
1568 */
1569static unsigned long find_alloced_obj(struct page *page, int index,
1570 struct size_class *class)
1571{
1572 unsigned long head;
1573 int offset = 0;
1574 unsigned long handle = 0;
1575 void *addr = kmap_atomic(page);
1576
1577 if (!is_first_page(page))
1578 offset = page->index;
1579 offset += class->size * index;
1580
1581 while (offset < PAGE_SIZE) {
1582 head = obj_to_head(class, page, addr + offset);
1583 if (head & OBJ_ALLOCATED_TAG) {
1584 handle = head & ~OBJ_ALLOCATED_TAG;
1585 if (trypin_tag(handle))
1586 break;
1587 handle = 0;
1588 }
1589
1590 offset += class->size;
1591 index++;
1592 }
1593
1594 kunmap_atomic(addr);
1595 return handle;
1596}
1597
1598struct zs_compact_control {
1599 /* Source page for migration which could be a subpage of zspage. */
1600 struct page *s_page;
1601 /* Destination page for migration which should be a first page
1602 * of zspage. */
1603 struct page *d_page;
1604 /* Starting object index within @s_page which used for live object
1605 * in the subpage. */
1606 int index;
1607 /* how many of objects are migrated */
1608 int nr_migrated;
1609};
1610
1611static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
1612 struct zs_compact_control *cc)
1613{
1614 unsigned long used_obj, free_obj;
1615 unsigned long handle;
1616 struct page *s_page = cc->s_page;
1617 struct page *d_page = cc->d_page;
1618 unsigned long index = cc->index;
1619 int nr_migrated = 0;
1620 int ret = 0;
1621
1622 while (1) {
1623 handle = find_alloced_obj(s_page, index, class);
1624 if (!handle) {
1625 s_page = get_next_page(s_page);
1626 if (!s_page)
1627 break;
1628 index = 0;
1629 continue;
1630 }
1631
1632 /* Stop if there is no more space */
1633 if (zspage_full(d_page)) {
1634 unpin_tag(handle);
1635 ret = -ENOMEM;
1636 break;
1637 }
1638
1639 used_obj = handle_to_obj(handle);
1640 free_obj = obj_malloc(d_page, class, handle);
1641 zs_object_copy(used_obj, free_obj, class);
1642 index++;
1643 record_obj(handle, free_obj);
1644 unpin_tag(handle);
1645 obj_free(pool, class, used_obj);
1646 nr_migrated++;
1647 }
1648
1649 /* Remember last position in this iteration */
1650 cc->s_page = s_page;
1651 cc->index = index;
1652 cc->nr_migrated = nr_migrated;
1653
1654 return ret;
1655}
1656
1657static struct page *alloc_target_page(struct size_class *class)
1658{
1659 int i;
1660 struct page *page;
1661
1662 for (i = 0; i < _ZS_NR_FULLNESS_GROUPS; i++) {
1663 page = class->fullness_list[i];
1664 if (page) {
1665 remove_zspage(page, class, i);
1666 break;
1667 }
1668 }
1669
1670 return page;
1671}
1672
1673static void putback_zspage(struct zs_pool *pool, struct size_class *class,
1674 struct page *first_page)
1675{
1676 enum fullness_group fullness;
1677
1678 BUG_ON(!is_first_page(first_page));
1679
1680 fullness = get_fullness_group(first_page);
1681 insert_zspage(first_page, class, fullness);
1682 set_zspage_mapping(first_page, class->index, fullness);
1329 1683
1330 if (fullness == ZS_EMPTY) { 1684 if (fullness == ZS_EMPTY) {
1685 zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
1686 class->size, class->pages_per_zspage));
1331 atomic_long_sub(class->pages_per_zspage, 1687 atomic_long_sub(class->pages_per_zspage,
1332 &pool->pages_allocated); 1688 &pool->pages_allocated);
1689
1333 free_zspage(first_page); 1690 free_zspage(first_page);
1334 } 1691 }
1335} 1692}
1336EXPORT_SYMBOL_GPL(zs_free); 1693
1694static struct page *isolate_source_page(struct size_class *class)
1695{
1696 struct page *page;
1697
1698 page = class->fullness_list[ZS_ALMOST_EMPTY];
1699 if (page)
1700 remove_zspage(page, class, ZS_ALMOST_EMPTY);
1701
1702 return page;
1703}
1704
1705static unsigned long __zs_compact(struct zs_pool *pool,
1706 struct size_class *class)
1707{
1708 int nr_to_migrate;
1709 struct zs_compact_control cc;
1710 struct page *src_page;
1711 struct page *dst_page = NULL;
1712 unsigned long nr_total_migrated = 0;
1713
1714 spin_lock(&class->lock);
1715 while ((src_page = isolate_source_page(class))) {
1716
1717 BUG_ON(!is_first_page(src_page));
1718
1719 /* The goal is to migrate all live objects in source page */
1720 nr_to_migrate = src_page->inuse;
1721 cc.index = 0;
1722 cc.s_page = src_page;
1723
1724 while ((dst_page = alloc_target_page(class))) {
1725 cc.d_page = dst_page;
1726 /*
1727 * If there is no more space in dst_page, try to
1728 * allocate another zspage.
1729 */
1730 if (!migrate_zspage(pool, class, &cc))
1731 break;
1732
1733 putback_zspage(pool, class, dst_page);
1734 nr_total_migrated += cc.nr_migrated;
1735 nr_to_migrate -= cc.nr_migrated;
1736 }
1737
1738 /* Stop if we couldn't find slot */
1739 if (dst_page == NULL)
1740 break;
1741
1742 putback_zspage(pool, class, dst_page);
1743 putback_zspage(pool, class, src_page);
1744 spin_unlock(&class->lock);
1745 nr_total_migrated += cc.nr_migrated;
1746 cond_resched();
1747 spin_lock(&class->lock);
1748 }
1749
1750 if (src_page)
1751 putback_zspage(pool, class, src_page);
1752
1753 spin_unlock(&class->lock);
1754
1755 return nr_total_migrated;
1756}
1757
1758unsigned long zs_compact(struct zs_pool *pool)
1759{
1760 int i;
1761 unsigned long nr_migrated = 0;
1762 struct size_class *class;
1763
1764 for (i = zs_size_classes - 1; i >= 0; i--) {
1765 class = pool->size_class[i];
1766 if (!class)
1767 continue;
1768 if (class->index != i)
1769 continue;
1770 nr_migrated += __zs_compact(pool, class);
1771 }
1772
1773 return nr_migrated;
1774}
1775EXPORT_SYMBOL_GPL(zs_compact);
1337 1776
1338/** 1777/**
1339 * zs_create_pool - Creates an allocation pool to work from. 1778 * zs_create_pool - Creates an allocation pool to work from.
@@ -1355,20 +1794,20 @@ struct zs_pool *zs_create_pool(char *name, gfp_t flags)
1355 if (!pool) 1794 if (!pool)
1356 return NULL; 1795 return NULL;
1357 1796
1358 pool->name = kstrdup(name, GFP_KERNEL);
1359 if (!pool->name) {
1360 kfree(pool);
1361 return NULL;
1362 }
1363
1364 pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *), 1797 pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
1365 GFP_KERNEL); 1798 GFP_KERNEL);
1366 if (!pool->size_class) { 1799 if (!pool->size_class) {
1367 kfree(pool->name);
1368 kfree(pool); 1800 kfree(pool);
1369 return NULL; 1801 return NULL;
1370 } 1802 }
1371 1803
1804 pool->name = kstrdup(name, GFP_KERNEL);
1805 if (!pool->name)
1806 goto err;
1807
1808 if (create_handle_cache(pool))
1809 goto err;
1810
1372 /* 1811 /*
1373 * Iterate reversly, because, size of size_class that we want to use 1812 * Iterate reversly, because, size of size_class that we want to use
1374 * for merging should be larger or equal to current size. 1813 * for merging should be larger or equal to current size.
@@ -1406,6 +1845,9 @@ struct zs_pool *zs_create_pool(char *name, gfp_t flags)
1406 class->size = size; 1845 class->size = size;
1407 class->index = i; 1846 class->index = i;
1408 class->pages_per_zspage = pages_per_zspage; 1847 class->pages_per_zspage = pages_per_zspage;
1848 if (pages_per_zspage == 1 &&
1849 get_maxobj_per_zspage(size, pages_per_zspage) == 1)
1850 class->huge = true;
1409 spin_lock_init(&class->lock); 1851 spin_lock_init(&class->lock);
1410 pool->size_class[i] = class; 1852 pool->size_class[i] = class;
1411 1853
@@ -1450,6 +1892,7 @@ void zs_destroy_pool(struct zs_pool *pool)
1450 kfree(class); 1892 kfree(class);
1451 } 1893 }
1452 1894
1895 destroy_handle_cache(pool);
1453 kfree(pool->size_class); 1896 kfree(pool->size_class);
1454 kfree(pool->name); 1897 kfree(pool->name);
1455 kfree(pool); 1898 kfree(pool);