diff options
Diffstat (limited to 'mm/zsmalloc.c')
-rw-r--r-- | mm/zsmalloc.c | 971 |
1 files changed, 707 insertions, 264 deletions
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 0dec1fa5f656..08bd7a3d464a 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c | |||
@@ -12,35 +12,6 @@ | |||
12 | */ | 12 | */ |
13 | 13 | ||
14 | /* | 14 | /* |
15 | * This allocator is designed for use with zram. Thus, the allocator is | ||
16 | * supposed to work well under low memory conditions. In particular, it | ||
17 | * never attempts higher order page allocation which is very likely to | ||
18 | * fail under memory pressure. On the other hand, if we just use single | ||
19 | * (0-order) pages, it would suffer from very high fragmentation -- | ||
20 | * any object of size PAGE_SIZE/2 or larger would occupy an entire page. | ||
21 | * This was one of the major issues with its predecessor (xvmalloc). | ||
22 | * | ||
23 | * To overcome these issues, zsmalloc allocates a bunch of 0-order pages | ||
24 | * and links them together using various 'struct page' fields. These linked | ||
25 | * pages act as a single higher-order page i.e. an object can span 0-order | ||
26 | * page boundaries. The code refers to these linked pages as a single entity | ||
27 | * called zspage. | ||
28 | * | ||
29 | * For simplicity, zsmalloc can only allocate objects of size up to PAGE_SIZE | ||
30 | * since this satisfies the requirements of all its current users (in the | ||
31 | * worst case, page is incompressible and is thus stored "as-is" i.e. in | ||
32 | * uncompressed form). For allocation requests larger than this size, failure | ||
33 | * is returned (see zs_malloc). | ||
34 | * | ||
35 | * Additionally, zs_malloc() does not return a dereferenceable pointer. | ||
36 | * Instead, it returns an opaque handle (unsigned long) which encodes actual | ||
37 | * location of the allocated object. The reason for this indirection is that | ||
38 | * zsmalloc does not keep zspages permanently mapped since that would cause | ||
39 | * issues on 32-bit systems where the VA region for kernel space mappings | ||
40 | * is very small. So, before using the allocating memory, the object has to | ||
41 | * be mapped using zs_map_object() to get a usable pointer and subsequently | ||
42 | * unmapped using zs_unmap_object(). | ||
43 | * | ||
44 | * Following is how we use various fields and flags of underlying | 15 | * Following is how we use various fields and flags of underlying |
45 | * struct page(s) to form a zspage. | 16 | * struct page(s) to form a zspage. |
46 | * | 17 | * |
@@ -57,6 +28,8 @@ | |||
57 | * | 28 | * |
58 | * page->private (union with page->first_page): refers to the | 29 | * page->private (union with page->first_page): refers to the |
59 | * component page after the first page | 30 | * component page after the first page |
31 | * If the page is first_page for huge object, it stores handle. | ||
32 | * Look at size_class->huge. | ||
60 | * page->freelist: points to the first free object in zspage. | 33 | * page->freelist: points to the first free object in zspage. |
61 | * Free objects are linked together using in-place | 34 | * Free objects are linked together using in-place |
62 | * metadata. | 35 | * metadata. |
@@ -78,6 +51,7 @@ | |||
78 | 51 | ||
79 | #include <linux/module.h> | 52 | #include <linux/module.h> |
80 | #include <linux/kernel.h> | 53 | #include <linux/kernel.h> |
54 | #include <linux/sched.h> | ||
81 | #include <linux/bitops.h> | 55 | #include <linux/bitops.h> |
82 | #include <linux/errno.h> | 56 | #include <linux/errno.h> |
83 | #include <linux/highmem.h> | 57 | #include <linux/highmem.h> |
@@ -110,6 +84,8 @@ | |||
110 | #define ZS_MAX_ZSPAGE_ORDER 2 | 84 | #define ZS_MAX_ZSPAGE_ORDER 2 |
111 | #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) | 85 | #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) |
112 | 86 | ||
87 | #define ZS_HANDLE_SIZE (sizeof(unsigned long)) | ||
88 | |||
113 | /* | 89 | /* |
114 | * Object location (<PFN>, <obj_idx>) is encoded as | 90 | * Object location (<PFN>, <obj_idx>) is encoded as |
115 | * as single (unsigned long) handle value. | 91 | * as single (unsigned long) handle value. |
@@ -133,13 +109,33 @@ | |||
133 | #endif | 109 | #endif |
134 | #endif | 110 | #endif |
135 | #define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) | 111 | #define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) |
136 | #define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS) | 112 | |
113 | /* | ||
114 | * Memory for allocating for handle keeps object position by | ||
115 | * encoding <page, obj_idx> and the encoded value has a room | ||
116 | * in least bit(ie, look at obj_to_location). | ||
117 | * We use the bit to synchronize between object access by | ||
118 | * user and migration. | ||
119 | */ | ||
120 | #define HANDLE_PIN_BIT 0 | ||
121 | |||
122 | /* | ||
123 | * Head in allocated object should have OBJ_ALLOCATED_TAG | ||
124 | * to identify the object was allocated or not. | ||
125 | * It's okay to add the status bit in the least bit because | ||
126 | * header keeps handle which is 4byte-aligned address so we | ||
127 | * have room for two bit at least. | ||
128 | */ | ||
129 | #define OBJ_ALLOCATED_TAG 1 | ||
130 | #define OBJ_TAG_BITS 1 | ||
131 | #define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS) | ||
137 | #define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) | 132 | #define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) |
138 | 133 | ||
139 | #define MAX(a, b) ((a) >= (b) ? (a) : (b)) | 134 | #define MAX(a, b) ((a) >= (b) ? (a) : (b)) |
140 | /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */ | 135 | /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */ |
141 | #define ZS_MIN_ALLOC_SIZE \ | 136 | #define ZS_MIN_ALLOC_SIZE \ |
142 | MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS)) | 137 | MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS)) |
138 | /* each chunk includes extra space to keep handle */ | ||
143 | #define ZS_MAX_ALLOC_SIZE PAGE_SIZE | 139 | #define ZS_MAX_ALLOC_SIZE PAGE_SIZE |
144 | 140 | ||
145 | /* | 141 | /* |
@@ -172,6 +168,8 @@ enum fullness_group { | |||
172 | enum zs_stat_type { | 168 | enum zs_stat_type { |
173 | OBJ_ALLOCATED, | 169 | OBJ_ALLOCATED, |
174 | OBJ_USED, | 170 | OBJ_USED, |
171 | CLASS_ALMOST_FULL, | ||
172 | CLASS_ALMOST_EMPTY, | ||
175 | NR_ZS_STAT_TYPE, | 173 | NR_ZS_STAT_TYPE, |
176 | }; | 174 | }; |
177 | 175 | ||
@@ -216,6 +214,8 @@ struct size_class { | |||
216 | 214 | ||
217 | /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ | 215 | /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ |
218 | int pages_per_zspage; | 216 | int pages_per_zspage; |
217 | /* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */ | ||
218 | bool huge; | ||
219 | 219 | ||
220 | #ifdef CONFIG_ZSMALLOC_STAT | 220 | #ifdef CONFIG_ZSMALLOC_STAT |
221 | struct zs_size_stat stats; | 221 | struct zs_size_stat stats; |
@@ -233,14 +233,24 @@ struct size_class { | |||
233 | * This must be power of 2 and less than or equal to ZS_ALIGN | 233 | * This must be power of 2 and less than or equal to ZS_ALIGN |
234 | */ | 234 | */ |
235 | struct link_free { | 235 | struct link_free { |
236 | /* Handle of next free chunk (encodes <PFN, obj_idx>) */ | 236 | union { |
237 | void *next; | 237 | /* |
238 | * Position of next free chunk (encodes <PFN, obj_idx>) | ||
239 | * It's valid for non-allocated object | ||
240 | */ | ||
241 | void *next; | ||
242 | /* | ||
243 | * Handle of allocated object. | ||
244 | */ | ||
245 | unsigned long handle; | ||
246 | }; | ||
238 | }; | 247 | }; |
239 | 248 | ||
240 | struct zs_pool { | 249 | struct zs_pool { |
241 | char *name; | 250 | char *name; |
242 | 251 | ||
243 | struct size_class **size_class; | 252 | struct size_class **size_class; |
253 | struct kmem_cache *handle_cachep; | ||
244 | 254 | ||
245 | gfp_t flags; /* allocation flags used when growing pool */ | 255 | gfp_t flags; /* allocation flags used when growing pool */ |
246 | atomic_long_t pages_allocated; | 256 | atomic_long_t pages_allocated; |
@@ -267,8 +277,37 @@ struct mapping_area { | |||
267 | #endif | 277 | #endif |
268 | char *vm_addr; /* address of kmap_atomic()'ed pages */ | 278 | char *vm_addr; /* address of kmap_atomic()'ed pages */ |
269 | enum zs_mapmode vm_mm; /* mapping mode */ | 279 | enum zs_mapmode vm_mm; /* mapping mode */ |
280 | bool huge; | ||
270 | }; | 281 | }; |
271 | 282 | ||
283 | static int create_handle_cache(struct zs_pool *pool) | ||
284 | { | ||
285 | pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE, | ||
286 | 0, 0, NULL); | ||
287 | return pool->handle_cachep ? 0 : 1; | ||
288 | } | ||
289 | |||
290 | static void destroy_handle_cache(struct zs_pool *pool) | ||
291 | { | ||
292 | kmem_cache_destroy(pool->handle_cachep); | ||
293 | } | ||
294 | |||
295 | static unsigned long alloc_handle(struct zs_pool *pool) | ||
296 | { | ||
297 | return (unsigned long)kmem_cache_alloc(pool->handle_cachep, | ||
298 | pool->flags & ~__GFP_HIGHMEM); | ||
299 | } | ||
300 | |||
301 | static void free_handle(struct zs_pool *pool, unsigned long handle) | ||
302 | { | ||
303 | kmem_cache_free(pool->handle_cachep, (void *)handle); | ||
304 | } | ||
305 | |||
306 | static void record_obj(unsigned long handle, unsigned long obj) | ||
307 | { | ||
308 | *(unsigned long *)handle = obj; | ||
309 | } | ||
310 | |||
272 | /* zpool driver */ | 311 | /* zpool driver */ |
273 | 312 | ||
274 | #ifdef CONFIG_ZPOOL | 313 | #ifdef CONFIG_ZPOOL |
@@ -346,6 +385,11 @@ static struct zpool_driver zs_zpool_driver = { | |||
346 | MODULE_ALIAS("zpool-zsmalloc"); | 385 | MODULE_ALIAS("zpool-zsmalloc"); |
347 | #endif /* CONFIG_ZPOOL */ | 386 | #endif /* CONFIG_ZPOOL */ |
348 | 387 | ||
388 | static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage) | ||
389 | { | ||
390 | return pages_per_zspage * PAGE_SIZE / size; | ||
391 | } | ||
392 | |||
349 | /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ | 393 | /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ |
350 | static DEFINE_PER_CPU(struct mapping_area, zs_map_area); | 394 | static DEFINE_PER_CPU(struct mapping_area, zs_map_area); |
351 | 395 | ||
@@ -396,9 +440,182 @@ static int get_size_class_index(int size) | |||
396 | idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE, | 440 | idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE, |
397 | ZS_SIZE_CLASS_DELTA); | 441 | ZS_SIZE_CLASS_DELTA); |
398 | 442 | ||
399 | return idx; | 443 | return min(zs_size_classes - 1, idx); |
444 | } | ||
445 | |||
446 | #ifdef CONFIG_ZSMALLOC_STAT | ||
447 | |||
448 | static inline void zs_stat_inc(struct size_class *class, | ||
449 | enum zs_stat_type type, unsigned long cnt) | ||
450 | { | ||
451 | class->stats.objs[type] += cnt; | ||
452 | } | ||
453 | |||
454 | static inline void zs_stat_dec(struct size_class *class, | ||
455 | enum zs_stat_type type, unsigned long cnt) | ||
456 | { | ||
457 | class->stats.objs[type] -= cnt; | ||
458 | } | ||
459 | |||
460 | static inline unsigned long zs_stat_get(struct size_class *class, | ||
461 | enum zs_stat_type type) | ||
462 | { | ||
463 | return class->stats.objs[type]; | ||
464 | } | ||
465 | |||
466 | static int __init zs_stat_init(void) | ||
467 | { | ||
468 | if (!debugfs_initialized()) | ||
469 | return -ENODEV; | ||
470 | |||
471 | zs_stat_root = debugfs_create_dir("zsmalloc", NULL); | ||
472 | if (!zs_stat_root) | ||
473 | return -ENOMEM; | ||
474 | |||
475 | return 0; | ||
476 | } | ||
477 | |||
478 | static void __exit zs_stat_exit(void) | ||
479 | { | ||
480 | debugfs_remove_recursive(zs_stat_root); | ||
481 | } | ||
482 | |||
483 | static int zs_stats_size_show(struct seq_file *s, void *v) | ||
484 | { | ||
485 | int i; | ||
486 | struct zs_pool *pool = s->private; | ||
487 | struct size_class *class; | ||
488 | int objs_per_zspage; | ||
489 | unsigned long class_almost_full, class_almost_empty; | ||
490 | unsigned long obj_allocated, obj_used, pages_used; | ||
491 | unsigned long total_class_almost_full = 0, total_class_almost_empty = 0; | ||
492 | unsigned long total_objs = 0, total_used_objs = 0, total_pages = 0; | ||
493 | |||
494 | seq_printf(s, " %5s %5s %11s %12s %13s %10s %10s %16s\n", | ||
495 | "class", "size", "almost_full", "almost_empty", | ||
496 | "obj_allocated", "obj_used", "pages_used", | ||
497 | "pages_per_zspage"); | ||
498 | |||
499 | for (i = 0; i < zs_size_classes; i++) { | ||
500 | class = pool->size_class[i]; | ||
501 | |||
502 | if (class->index != i) | ||
503 | continue; | ||
504 | |||
505 | spin_lock(&class->lock); | ||
506 | class_almost_full = zs_stat_get(class, CLASS_ALMOST_FULL); | ||
507 | class_almost_empty = zs_stat_get(class, CLASS_ALMOST_EMPTY); | ||
508 | obj_allocated = zs_stat_get(class, OBJ_ALLOCATED); | ||
509 | obj_used = zs_stat_get(class, OBJ_USED); | ||
510 | spin_unlock(&class->lock); | ||
511 | |||
512 | objs_per_zspage = get_maxobj_per_zspage(class->size, | ||
513 | class->pages_per_zspage); | ||
514 | pages_used = obj_allocated / objs_per_zspage * | ||
515 | class->pages_per_zspage; | ||
516 | |||
517 | seq_printf(s, " %5u %5u %11lu %12lu %13lu %10lu %10lu %16d\n", | ||
518 | i, class->size, class_almost_full, class_almost_empty, | ||
519 | obj_allocated, obj_used, pages_used, | ||
520 | class->pages_per_zspage); | ||
521 | |||
522 | total_class_almost_full += class_almost_full; | ||
523 | total_class_almost_empty += class_almost_empty; | ||
524 | total_objs += obj_allocated; | ||
525 | total_used_objs += obj_used; | ||
526 | total_pages += pages_used; | ||
527 | } | ||
528 | |||
529 | seq_puts(s, "\n"); | ||
530 | seq_printf(s, " %5s %5s %11lu %12lu %13lu %10lu %10lu\n", | ||
531 | "Total", "", total_class_almost_full, | ||
532 | total_class_almost_empty, total_objs, | ||
533 | total_used_objs, total_pages); | ||
534 | |||
535 | return 0; | ||
536 | } | ||
537 | |||
538 | static int zs_stats_size_open(struct inode *inode, struct file *file) | ||
539 | { | ||
540 | return single_open(file, zs_stats_size_show, inode->i_private); | ||
541 | } | ||
542 | |||
543 | static const struct file_operations zs_stat_size_ops = { | ||
544 | .open = zs_stats_size_open, | ||
545 | .read = seq_read, | ||
546 | .llseek = seq_lseek, | ||
547 | .release = single_release, | ||
548 | }; | ||
549 | |||
550 | static int zs_pool_stat_create(char *name, struct zs_pool *pool) | ||
551 | { | ||
552 | struct dentry *entry; | ||
553 | |||
554 | if (!zs_stat_root) | ||
555 | return -ENODEV; | ||
556 | |||
557 | entry = debugfs_create_dir(name, zs_stat_root); | ||
558 | if (!entry) { | ||
559 | pr_warn("debugfs dir <%s> creation failed\n", name); | ||
560 | return -ENOMEM; | ||
561 | } | ||
562 | pool->stat_dentry = entry; | ||
563 | |||
564 | entry = debugfs_create_file("classes", S_IFREG | S_IRUGO, | ||
565 | pool->stat_dentry, pool, &zs_stat_size_ops); | ||
566 | if (!entry) { | ||
567 | pr_warn("%s: debugfs file entry <%s> creation failed\n", | ||
568 | name, "classes"); | ||
569 | return -ENOMEM; | ||
570 | } | ||
571 | |||
572 | return 0; | ||
573 | } | ||
574 | |||
575 | static void zs_pool_stat_destroy(struct zs_pool *pool) | ||
576 | { | ||
577 | debugfs_remove_recursive(pool->stat_dentry); | ||
578 | } | ||
579 | |||
580 | #else /* CONFIG_ZSMALLOC_STAT */ | ||
581 | |||
582 | static inline void zs_stat_inc(struct size_class *class, | ||
583 | enum zs_stat_type type, unsigned long cnt) | ||
584 | { | ||
585 | } | ||
586 | |||
587 | static inline void zs_stat_dec(struct size_class *class, | ||
588 | enum zs_stat_type type, unsigned long cnt) | ||
589 | { | ||
590 | } | ||
591 | |||
592 | static inline unsigned long zs_stat_get(struct size_class *class, | ||
593 | enum zs_stat_type type) | ||
594 | { | ||
595 | return 0; | ||
596 | } | ||
597 | |||
598 | static int __init zs_stat_init(void) | ||
599 | { | ||
600 | return 0; | ||
601 | } | ||
602 | |||
603 | static void __exit zs_stat_exit(void) | ||
604 | { | ||
605 | } | ||
606 | |||
607 | static inline int zs_pool_stat_create(char *name, struct zs_pool *pool) | ||
608 | { | ||
609 | return 0; | ||
610 | } | ||
611 | |||
612 | static inline void zs_pool_stat_destroy(struct zs_pool *pool) | ||
613 | { | ||
400 | } | 614 | } |
401 | 615 | ||
616 | #endif | ||
617 | |||
618 | |||
402 | /* | 619 | /* |
403 | * For each size class, zspages are divided into different groups | 620 | * For each size class, zspages are divided into different groups |
404 | * depending on how "full" they are. This was done so that we could | 621 | * depending on how "full" they are. This was done so that we could |
@@ -419,7 +636,7 @@ static enum fullness_group get_fullness_group(struct page *page) | |||
419 | fg = ZS_EMPTY; | 636 | fg = ZS_EMPTY; |
420 | else if (inuse == max_objects) | 637 | else if (inuse == max_objects) |
421 | fg = ZS_FULL; | 638 | fg = ZS_FULL; |
422 | else if (inuse <= max_objects / fullness_threshold_frac) | 639 | else if (inuse <= 3 * max_objects / fullness_threshold_frac) |
423 | fg = ZS_ALMOST_EMPTY; | 640 | fg = ZS_ALMOST_EMPTY; |
424 | else | 641 | else |
425 | fg = ZS_ALMOST_FULL; | 642 | fg = ZS_ALMOST_FULL; |
@@ -448,6 +665,8 @@ static void insert_zspage(struct page *page, struct size_class *class, | |||
448 | list_add_tail(&page->lru, &(*head)->lru); | 665 | list_add_tail(&page->lru, &(*head)->lru); |
449 | 666 | ||
450 | *head = page; | 667 | *head = page; |
668 | zs_stat_inc(class, fullness == ZS_ALMOST_EMPTY ? | ||
669 | CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1); | ||
451 | } | 670 | } |
452 | 671 | ||
453 | /* | 672 | /* |
@@ -473,6 +692,8 @@ static void remove_zspage(struct page *page, struct size_class *class, | |||
473 | struct page, lru); | 692 | struct page, lru); |
474 | 693 | ||
475 | list_del_init(&page->lru); | 694 | list_del_init(&page->lru); |
695 | zs_stat_dec(class, fullness == ZS_ALMOST_EMPTY ? | ||
696 | CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1); | ||
476 | } | 697 | } |
477 | 698 | ||
478 | /* | 699 | /* |
@@ -484,11 +705,10 @@ static void remove_zspage(struct page *page, struct size_class *class, | |||
484 | * page from the freelist of the old fullness group to that of the new | 705 | * page from the freelist of the old fullness group to that of the new |
485 | * fullness group. | 706 | * fullness group. |
486 | */ | 707 | */ |
487 | static enum fullness_group fix_fullness_group(struct zs_pool *pool, | 708 | static enum fullness_group fix_fullness_group(struct size_class *class, |
488 | struct page *page) | 709 | struct page *page) |
489 | { | 710 | { |
490 | int class_idx; | 711 | int class_idx; |
491 | struct size_class *class; | ||
492 | enum fullness_group currfg, newfg; | 712 | enum fullness_group currfg, newfg; |
493 | 713 | ||
494 | BUG_ON(!is_first_page(page)); | 714 | BUG_ON(!is_first_page(page)); |
@@ -498,7 +718,6 @@ static enum fullness_group fix_fullness_group(struct zs_pool *pool, | |||
498 | if (newfg == currfg) | 718 | if (newfg == currfg) |
499 | goto out; | 719 | goto out; |
500 | 720 | ||
501 | class = pool->size_class[class_idx]; | ||
502 | remove_zspage(page, class, currfg); | 721 | remove_zspage(page, class, currfg); |
503 | insert_zspage(page, class, newfg); | 722 | insert_zspage(page, class, newfg); |
504 | set_zspage_mapping(page, class_idx, newfg); | 723 | set_zspage_mapping(page, class_idx, newfg); |
@@ -512,7 +731,8 @@ out: | |||
512 | * to form a zspage for each size class. This is important | 731 | * to form a zspage for each size class. This is important |
513 | * to reduce wastage due to unusable space left at end of | 732 | * to reduce wastage due to unusable space left at end of |
514 | * each zspage which is given as: | 733 | * each zspage which is given as: |
515 | * wastage = Zp - Zp % size_class | 734 | * wastage = Zp % class_size |
735 | * usage = Zp - wastage | ||
516 | * where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ... | 736 | * where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ... |
517 | * | 737 | * |
518 | * For example, for size class of 3/8 * PAGE_SIZE, we should | 738 | * For example, for size class of 3/8 * PAGE_SIZE, we should |
@@ -571,35 +791,50 @@ static struct page *get_next_page(struct page *page) | |||
571 | 791 | ||
572 | /* | 792 | /* |
573 | * Encode <page, obj_idx> as a single handle value. | 793 | * Encode <page, obj_idx> as a single handle value. |
574 | * On hardware platforms with physical memory starting at 0x0 the pfn | 794 | * We use the least bit of handle for tagging. |
575 | * could be 0 so we ensure that the handle will never be 0 by adjusting the | ||
576 | * encoded obj_idx value before encoding. | ||
577 | */ | 795 | */ |
578 | static void *obj_location_to_handle(struct page *page, unsigned long obj_idx) | 796 | static void *location_to_obj(struct page *page, unsigned long obj_idx) |
579 | { | 797 | { |
580 | unsigned long handle; | 798 | unsigned long obj; |
581 | 799 | ||
582 | if (!page) { | 800 | if (!page) { |
583 | BUG_ON(obj_idx); | 801 | BUG_ON(obj_idx); |
584 | return NULL; | 802 | return NULL; |
585 | } | 803 | } |
586 | 804 | ||
587 | handle = page_to_pfn(page) << OBJ_INDEX_BITS; | 805 | obj = page_to_pfn(page) << OBJ_INDEX_BITS; |
588 | handle |= ((obj_idx + 1) & OBJ_INDEX_MASK); | 806 | obj |= ((obj_idx) & OBJ_INDEX_MASK); |
807 | obj <<= OBJ_TAG_BITS; | ||
589 | 808 | ||
590 | return (void *)handle; | 809 | return (void *)obj; |
591 | } | 810 | } |
592 | 811 | ||
593 | /* | 812 | /* |
594 | * Decode <page, obj_idx> pair from the given object handle. We adjust the | 813 | * Decode <page, obj_idx> pair from the given object handle. We adjust the |
595 | * decoded obj_idx back to its original value since it was adjusted in | 814 | * decoded obj_idx back to its original value since it was adjusted in |
596 | * obj_location_to_handle(). | 815 | * location_to_obj(). |
597 | */ | 816 | */ |
598 | static void obj_handle_to_location(unsigned long handle, struct page **page, | 817 | static void obj_to_location(unsigned long obj, struct page **page, |
599 | unsigned long *obj_idx) | 818 | unsigned long *obj_idx) |
600 | { | 819 | { |
601 | *page = pfn_to_page(handle >> OBJ_INDEX_BITS); | 820 | obj >>= OBJ_TAG_BITS; |
602 | *obj_idx = (handle & OBJ_INDEX_MASK) - 1; | 821 | *page = pfn_to_page(obj >> OBJ_INDEX_BITS); |
822 | *obj_idx = (obj & OBJ_INDEX_MASK); | ||
823 | } | ||
824 | |||
825 | static unsigned long handle_to_obj(unsigned long handle) | ||
826 | { | ||
827 | return *(unsigned long *)handle; | ||
828 | } | ||
829 | |||
830 | static unsigned long obj_to_head(struct size_class *class, struct page *page, | ||
831 | void *obj) | ||
832 | { | ||
833 | if (class->huge) { | ||
834 | VM_BUG_ON(!is_first_page(page)); | ||
835 | return *(unsigned long *)page_private(page); | ||
836 | } else | ||
837 | return *(unsigned long *)obj; | ||
603 | } | 838 | } |
604 | 839 | ||
605 | static unsigned long obj_idx_to_offset(struct page *page, | 840 | static unsigned long obj_idx_to_offset(struct page *page, |
@@ -613,6 +848,25 @@ static unsigned long obj_idx_to_offset(struct page *page, | |||
613 | return off + obj_idx * class_size; | 848 | return off + obj_idx * class_size; |
614 | } | 849 | } |
615 | 850 | ||
851 | static inline int trypin_tag(unsigned long handle) | ||
852 | { | ||
853 | unsigned long *ptr = (unsigned long *)handle; | ||
854 | |||
855 | return !test_and_set_bit_lock(HANDLE_PIN_BIT, ptr); | ||
856 | } | ||
857 | |||
858 | static void pin_tag(unsigned long handle) | ||
859 | { | ||
860 | while (!trypin_tag(handle)); | ||
861 | } | ||
862 | |||
863 | static void unpin_tag(unsigned long handle) | ||
864 | { | ||
865 | unsigned long *ptr = (unsigned long *)handle; | ||
866 | |||
867 | clear_bit_unlock(HANDLE_PIN_BIT, ptr); | ||
868 | } | ||
869 | |||
616 | static void reset_page(struct page *page) | 870 | static void reset_page(struct page *page) |
617 | { | 871 | { |
618 | clear_bit(PG_private, &page->flags); | 872 | clear_bit(PG_private, &page->flags); |
@@ -674,7 +928,7 @@ static void init_zspage(struct page *first_page, struct size_class *class) | |||
674 | link = (struct link_free *)vaddr + off / sizeof(*link); | 928 | link = (struct link_free *)vaddr + off / sizeof(*link); |
675 | 929 | ||
676 | while ((off += class->size) < PAGE_SIZE) { | 930 | while ((off += class->size) < PAGE_SIZE) { |
677 | link->next = obj_location_to_handle(page, i++); | 931 | link->next = location_to_obj(page, i++); |
678 | link += class->size / sizeof(*link); | 932 | link += class->size / sizeof(*link); |
679 | } | 933 | } |
680 | 934 | ||
@@ -684,7 +938,7 @@ static void init_zspage(struct page *first_page, struct size_class *class) | |||
684 | * page (if present) | 938 | * page (if present) |
685 | */ | 939 | */ |
686 | next_page = get_next_page(page); | 940 | next_page = get_next_page(page); |
687 | link->next = obj_location_to_handle(next_page, 0); | 941 | link->next = location_to_obj(next_page, 0); |
688 | kunmap_atomic(vaddr); | 942 | kunmap_atomic(vaddr); |
689 | page = next_page; | 943 | page = next_page; |
690 | off %= PAGE_SIZE; | 944 | off %= PAGE_SIZE; |
@@ -738,7 +992,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags) | |||
738 | 992 | ||
739 | init_zspage(first_page, class); | 993 | init_zspage(first_page, class); |
740 | 994 | ||
741 | first_page->freelist = obj_location_to_handle(first_page, 0); | 995 | first_page->freelist = location_to_obj(first_page, 0); |
742 | /* Maximum number of objects we can store in this zspage */ | 996 | /* Maximum number of objects we can store in this zspage */ |
743 | first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size; | 997 | first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size; |
744 | 998 | ||
@@ -860,12 +1114,19 @@ static void __zs_unmap_object(struct mapping_area *area, | |||
860 | { | 1114 | { |
861 | int sizes[2]; | 1115 | int sizes[2]; |
862 | void *addr; | 1116 | void *addr; |
863 | char *buf = area->vm_buf; | 1117 | char *buf; |
864 | 1118 | ||
865 | /* no write fastpath */ | 1119 | /* no write fastpath */ |
866 | if (area->vm_mm == ZS_MM_RO) | 1120 | if (area->vm_mm == ZS_MM_RO) |
867 | goto out; | 1121 | goto out; |
868 | 1122 | ||
1123 | buf = area->vm_buf; | ||
1124 | if (!area->huge) { | ||
1125 | buf = buf + ZS_HANDLE_SIZE; | ||
1126 | size -= ZS_HANDLE_SIZE; | ||
1127 | off += ZS_HANDLE_SIZE; | ||
1128 | } | ||
1129 | |||
869 | sizes[0] = PAGE_SIZE - off; | 1130 | sizes[0] = PAGE_SIZE - off; |
870 | sizes[1] = size - sizes[0]; | 1131 | sizes[1] = size - sizes[0]; |
871 | 1132 | ||
@@ -952,11 +1213,6 @@ static void init_zs_size_classes(void) | |||
952 | zs_size_classes = nr; | 1213 | zs_size_classes = nr; |
953 | } | 1214 | } |
954 | 1215 | ||
955 | static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage) | ||
956 | { | ||
957 | return pages_per_zspage * PAGE_SIZE / size; | ||
958 | } | ||
959 | |||
960 | static bool can_merge(struct size_class *prev, int size, int pages_per_zspage) | 1216 | static bool can_merge(struct size_class *prev, int size, int pages_per_zspage) |
961 | { | 1217 | { |
962 | if (prev->pages_per_zspage != pages_per_zspage) | 1218 | if (prev->pages_per_zspage != pages_per_zspage) |
@@ -969,166 +1225,13 @@ static bool can_merge(struct size_class *prev, int size, int pages_per_zspage) | |||
969 | return true; | 1225 | return true; |
970 | } | 1226 | } |
971 | 1227 | ||
972 | #ifdef CONFIG_ZSMALLOC_STAT | 1228 | static bool zspage_full(struct page *page) |
973 | |||
974 | static inline void zs_stat_inc(struct size_class *class, | ||
975 | enum zs_stat_type type, unsigned long cnt) | ||
976 | { | ||
977 | class->stats.objs[type] += cnt; | ||
978 | } | ||
979 | |||
980 | static inline void zs_stat_dec(struct size_class *class, | ||
981 | enum zs_stat_type type, unsigned long cnt) | ||
982 | { | ||
983 | class->stats.objs[type] -= cnt; | ||
984 | } | ||
985 | |||
986 | static inline unsigned long zs_stat_get(struct size_class *class, | ||
987 | enum zs_stat_type type) | ||
988 | { | ||
989 | return class->stats.objs[type]; | ||
990 | } | ||
991 | |||
992 | static int __init zs_stat_init(void) | ||
993 | { | ||
994 | if (!debugfs_initialized()) | ||
995 | return -ENODEV; | ||
996 | |||
997 | zs_stat_root = debugfs_create_dir("zsmalloc", NULL); | ||
998 | if (!zs_stat_root) | ||
999 | return -ENOMEM; | ||
1000 | |||
1001 | return 0; | ||
1002 | } | ||
1003 | |||
1004 | static void __exit zs_stat_exit(void) | ||
1005 | { | ||
1006 | debugfs_remove_recursive(zs_stat_root); | ||
1007 | } | ||
1008 | |||
1009 | static int zs_stats_size_show(struct seq_file *s, void *v) | ||
1010 | { | 1229 | { |
1011 | int i; | 1230 | BUG_ON(!is_first_page(page)); |
1012 | struct zs_pool *pool = s->private; | ||
1013 | struct size_class *class; | ||
1014 | int objs_per_zspage; | ||
1015 | unsigned long obj_allocated, obj_used, pages_used; | ||
1016 | unsigned long total_objs = 0, total_used_objs = 0, total_pages = 0; | ||
1017 | |||
1018 | seq_printf(s, " %5s %5s %13s %10s %10s\n", "class", "size", | ||
1019 | "obj_allocated", "obj_used", "pages_used"); | ||
1020 | |||
1021 | for (i = 0; i < zs_size_classes; i++) { | ||
1022 | class = pool->size_class[i]; | ||
1023 | |||
1024 | if (class->index != i) | ||
1025 | continue; | ||
1026 | |||
1027 | spin_lock(&class->lock); | ||
1028 | obj_allocated = zs_stat_get(class, OBJ_ALLOCATED); | ||
1029 | obj_used = zs_stat_get(class, OBJ_USED); | ||
1030 | spin_unlock(&class->lock); | ||
1031 | |||
1032 | objs_per_zspage = get_maxobj_per_zspage(class->size, | ||
1033 | class->pages_per_zspage); | ||
1034 | pages_used = obj_allocated / objs_per_zspage * | ||
1035 | class->pages_per_zspage; | ||
1036 | |||
1037 | seq_printf(s, " %5u %5u %10lu %10lu %10lu\n", i, | ||
1038 | class->size, obj_allocated, obj_used, pages_used); | ||
1039 | |||
1040 | total_objs += obj_allocated; | ||
1041 | total_used_objs += obj_used; | ||
1042 | total_pages += pages_used; | ||
1043 | } | ||
1044 | |||
1045 | seq_puts(s, "\n"); | ||
1046 | seq_printf(s, " %5s %5s %10lu %10lu %10lu\n", "Total", "", | ||
1047 | total_objs, total_used_objs, total_pages); | ||
1048 | |||
1049 | return 0; | ||
1050 | } | ||
1051 | |||
1052 | static int zs_stats_size_open(struct inode *inode, struct file *file) | ||
1053 | { | ||
1054 | return single_open(file, zs_stats_size_show, inode->i_private); | ||
1055 | } | ||
1056 | |||
1057 | static const struct file_operations zs_stat_size_ops = { | ||
1058 | .open = zs_stats_size_open, | ||
1059 | .read = seq_read, | ||
1060 | .llseek = seq_lseek, | ||
1061 | .release = single_release, | ||
1062 | }; | ||
1063 | |||
1064 | static int zs_pool_stat_create(char *name, struct zs_pool *pool) | ||
1065 | { | ||
1066 | struct dentry *entry; | ||
1067 | |||
1068 | if (!zs_stat_root) | ||
1069 | return -ENODEV; | ||
1070 | |||
1071 | entry = debugfs_create_dir(name, zs_stat_root); | ||
1072 | if (!entry) { | ||
1073 | pr_warn("debugfs dir <%s> creation failed\n", name); | ||
1074 | return -ENOMEM; | ||
1075 | } | ||
1076 | pool->stat_dentry = entry; | ||
1077 | |||
1078 | entry = debugfs_create_file("obj_in_classes", S_IFREG | S_IRUGO, | ||
1079 | pool->stat_dentry, pool, &zs_stat_size_ops); | ||
1080 | if (!entry) { | ||
1081 | pr_warn("%s: debugfs file entry <%s> creation failed\n", | ||
1082 | name, "obj_in_classes"); | ||
1083 | return -ENOMEM; | ||
1084 | } | ||
1085 | |||
1086 | return 0; | ||
1087 | } | ||
1088 | |||
1089 | static void zs_pool_stat_destroy(struct zs_pool *pool) | ||
1090 | { | ||
1091 | debugfs_remove_recursive(pool->stat_dentry); | ||
1092 | } | ||
1093 | |||
1094 | #else /* CONFIG_ZSMALLOC_STAT */ | ||
1095 | |||
1096 | static inline void zs_stat_inc(struct size_class *class, | ||
1097 | enum zs_stat_type type, unsigned long cnt) | ||
1098 | { | ||
1099 | } | ||
1100 | |||
1101 | static inline void zs_stat_dec(struct size_class *class, | ||
1102 | enum zs_stat_type type, unsigned long cnt) | ||
1103 | { | ||
1104 | } | ||
1105 | |||
1106 | static inline unsigned long zs_stat_get(struct size_class *class, | ||
1107 | enum zs_stat_type type) | ||
1108 | { | ||
1109 | return 0; | ||
1110 | } | ||
1111 | |||
1112 | static int __init zs_stat_init(void) | ||
1113 | { | ||
1114 | return 0; | ||
1115 | } | ||
1116 | |||
1117 | static void __exit zs_stat_exit(void) | ||
1118 | { | ||
1119 | } | ||
1120 | |||
1121 | static inline int zs_pool_stat_create(char *name, struct zs_pool *pool) | ||
1122 | { | ||
1123 | return 0; | ||
1124 | } | ||
1125 | 1231 | ||
1126 | static inline void zs_pool_stat_destroy(struct zs_pool *pool) | 1232 | return page->inuse == page->objects; |
1127 | { | ||
1128 | } | 1233 | } |
1129 | 1234 | ||
1130 | #endif | ||
1131 | |||
1132 | unsigned long zs_get_total_pages(struct zs_pool *pool) | 1235 | unsigned long zs_get_total_pages(struct zs_pool *pool) |
1133 | { | 1236 | { |
1134 | return atomic_long_read(&pool->pages_allocated); | 1237 | return atomic_long_read(&pool->pages_allocated); |
@@ -1153,13 +1256,14 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, | |||
1153 | enum zs_mapmode mm) | 1256 | enum zs_mapmode mm) |
1154 | { | 1257 | { |
1155 | struct page *page; | 1258 | struct page *page; |
1156 | unsigned long obj_idx, off; | 1259 | unsigned long obj, obj_idx, off; |
1157 | 1260 | ||
1158 | unsigned int class_idx; | 1261 | unsigned int class_idx; |
1159 | enum fullness_group fg; | 1262 | enum fullness_group fg; |
1160 | struct size_class *class; | 1263 | struct size_class *class; |
1161 | struct mapping_area *area; | 1264 | struct mapping_area *area; |
1162 | struct page *pages[2]; | 1265 | struct page *pages[2]; |
1266 | void *ret; | ||
1163 | 1267 | ||
1164 | BUG_ON(!handle); | 1268 | BUG_ON(!handle); |
1165 | 1269 | ||
@@ -1170,7 +1274,11 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, | |||
1170 | */ | 1274 | */ |
1171 | BUG_ON(in_interrupt()); | 1275 | BUG_ON(in_interrupt()); |
1172 | 1276 | ||
1173 | obj_handle_to_location(handle, &page, &obj_idx); | 1277 | /* From now on, migration cannot move the object */ |
1278 | pin_tag(handle); | ||
1279 | |||
1280 | obj = handle_to_obj(handle); | ||
1281 | obj_to_location(obj, &page, &obj_idx); | ||
1174 | get_zspage_mapping(get_first_page(page), &class_idx, &fg); | 1282 | get_zspage_mapping(get_first_page(page), &class_idx, &fg); |
1175 | class = pool->size_class[class_idx]; | 1283 | class = pool->size_class[class_idx]; |
1176 | off = obj_idx_to_offset(page, obj_idx, class->size); | 1284 | off = obj_idx_to_offset(page, obj_idx, class->size); |
@@ -1180,7 +1288,8 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, | |||
1180 | if (off + class->size <= PAGE_SIZE) { | 1288 | if (off + class->size <= PAGE_SIZE) { |
1181 | /* this object is contained entirely within a page */ | 1289 | /* this object is contained entirely within a page */ |
1182 | area->vm_addr = kmap_atomic(page); | 1290 | area->vm_addr = kmap_atomic(page); |
1183 | return area->vm_addr + off; | 1291 | ret = area->vm_addr + off; |
1292 | goto out; | ||
1184 | } | 1293 | } |
1185 | 1294 | ||
1186 | /* this object spans two pages */ | 1295 | /* this object spans two pages */ |
@@ -1188,14 +1297,19 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, | |||
1188 | pages[1] = get_next_page(page); | 1297 | pages[1] = get_next_page(page); |
1189 | BUG_ON(!pages[1]); | 1298 | BUG_ON(!pages[1]); |
1190 | 1299 | ||
1191 | return __zs_map_object(area, pages, off, class->size); | 1300 | ret = __zs_map_object(area, pages, off, class->size); |
1301 | out: | ||
1302 | if (!class->huge) | ||
1303 | ret += ZS_HANDLE_SIZE; | ||
1304 | |||
1305 | return ret; | ||
1192 | } | 1306 | } |
1193 | EXPORT_SYMBOL_GPL(zs_map_object); | 1307 | EXPORT_SYMBOL_GPL(zs_map_object); |
1194 | 1308 | ||
1195 | void zs_unmap_object(struct zs_pool *pool, unsigned long handle) | 1309 | void zs_unmap_object(struct zs_pool *pool, unsigned long handle) |
1196 | { | 1310 | { |
1197 | struct page *page; | 1311 | struct page *page; |
1198 | unsigned long obj_idx, off; | 1312 | unsigned long obj, obj_idx, off; |
1199 | 1313 | ||
1200 | unsigned int class_idx; | 1314 | unsigned int class_idx; |
1201 | enum fullness_group fg; | 1315 | enum fullness_group fg; |
@@ -1204,7 +1318,8 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) | |||
1204 | 1318 | ||
1205 | BUG_ON(!handle); | 1319 | BUG_ON(!handle); |
1206 | 1320 | ||
1207 | obj_handle_to_location(handle, &page, &obj_idx); | 1321 | obj = handle_to_obj(handle); |
1322 | obj_to_location(obj, &page, &obj_idx); | ||
1208 | get_zspage_mapping(get_first_page(page), &class_idx, &fg); | 1323 | get_zspage_mapping(get_first_page(page), &class_idx, &fg); |
1209 | class = pool->size_class[class_idx]; | 1324 | class = pool->size_class[class_idx]; |
1210 | off = obj_idx_to_offset(page, obj_idx, class->size); | 1325 | off = obj_idx_to_offset(page, obj_idx, class->size); |
@@ -1222,9 +1337,42 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) | |||
1222 | __zs_unmap_object(area, pages, off, class->size); | 1337 | __zs_unmap_object(area, pages, off, class->size); |
1223 | } | 1338 | } |
1224 | put_cpu_var(zs_map_area); | 1339 | put_cpu_var(zs_map_area); |
1340 | unpin_tag(handle); | ||
1225 | } | 1341 | } |
1226 | EXPORT_SYMBOL_GPL(zs_unmap_object); | 1342 | EXPORT_SYMBOL_GPL(zs_unmap_object); |
1227 | 1343 | ||
1344 | static unsigned long obj_malloc(struct page *first_page, | ||
1345 | struct size_class *class, unsigned long handle) | ||
1346 | { | ||
1347 | unsigned long obj; | ||
1348 | struct link_free *link; | ||
1349 | |||
1350 | struct page *m_page; | ||
1351 | unsigned long m_objidx, m_offset; | ||
1352 | void *vaddr; | ||
1353 | |||
1354 | handle |= OBJ_ALLOCATED_TAG; | ||
1355 | obj = (unsigned long)first_page->freelist; | ||
1356 | obj_to_location(obj, &m_page, &m_objidx); | ||
1357 | m_offset = obj_idx_to_offset(m_page, m_objidx, class->size); | ||
1358 | |||
1359 | vaddr = kmap_atomic(m_page); | ||
1360 | link = (struct link_free *)vaddr + m_offset / sizeof(*link); | ||
1361 | first_page->freelist = link->next; | ||
1362 | if (!class->huge) | ||
1363 | /* record handle in the header of allocated chunk */ | ||
1364 | link->handle = handle; | ||
1365 | else | ||
1366 | /* record handle in first_page->private */ | ||
1367 | set_page_private(first_page, handle); | ||
1368 | kunmap_atomic(vaddr); | ||
1369 | first_page->inuse++; | ||
1370 | zs_stat_inc(class, OBJ_USED, 1); | ||
1371 | |||
1372 | return obj; | ||
1373 | } | ||
1374 | |||
1375 | |||
1228 | /** | 1376 | /** |
1229 | * zs_malloc - Allocate block of given size from pool. | 1377 | * zs_malloc - Allocate block of given size from pool. |
1230 | * @pool: pool to allocate from | 1378 | * @pool: pool to allocate from |
@@ -1236,17 +1384,19 @@ EXPORT_SYMBOL_GPL(zs_unmap_object); | |||
1236 | */ | 1384 | */ |
1237 | unsigned long zs_malloc(struct zs_pool *pool, size_t size) | 1385 | unsigned long zs_malloc(struct zs_pool *pool, size_t size) |
1238 | { | 1386 | { |
1239 | unsigned long obj; | 1387 | unsigned long handle, obj; |
1240 | struct link_free *link; | ||
1241 | struct size_class *class; | 1388 | struct size_class *class; |
1242 | void *vaddr; | 1389 | struct page *first_page; |
1243 | |||
1244 | struct page *first_page, *m_page; | ||
1245 | unsigned long m_objidx, m_offset; | ||
1246 | 1390 | ||
1247 | if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE)) | 1391 | if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE)) |
1248 | return 0; | 1392 | return 0; |
1249 | 1393 | ||
1394 | handle = alloc_handle(pool); | ||
1395 | if (!handle) | ||
1396 | return 0; | ||
1397 | |||
1398 | /* extra space in chunk to keep the handle */ | ||
1399 | size += ZS_HANDLE_SIZE; | ||
1250 | class = pool->size_class[get_size_class_index(size)]; | 1400 | class = pool->size_class[get_size_class_index(size)]; |
1251 | 1401 | ||
1252 | spin_lock(&class->lock); | 1402 | spin_lock(&class->lock); |
@@ -1255,8 +1405,10 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size) | |||
1255 | if (!first_page) { | 1405 | if (!first_page) { |
1256 | spin_unlock(&class->lock); | 1406 | spin_unlock(&class->lock); |
1257 | first_page = alloc_zspage(class, pool->flags); | 1407 | first_page = alloc_zspage(class, pool->flags); |
1258 | if (unlikely(!first_page)) | 1408 | if (unlikely(!first_page)) { |
1409 | free_handle(pool, handle); | ||
1259 | return 0; | 1410 | return 0; |
1411 | } | ||
1260 | 1412 | ||
1261 | set_zspage_mapping(first_page, class->index, ZS_EMPTY); | 1413 | set_zspage_mapping(first_page, class->index, ZS_EMPTY); |
1262 | atomic_long_add(class->pages_per_zspage, | 1414 | atomic_long_add(class->pages_per_zspage, |
@@ -1267,73 +1419,360 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size) | |||
1267 | class->size, class->pages_per_zspage)); | 1419 | class->size, class->pages_per_zspage)); |
1268 | } | 1420 | } |
1269 | 1421 | ||
1270 | obj = (unsigned long)first_page->freelist; | 1422 | obj = obj_malloc(first_page, class, handle); |
1271 | obj_handle_to_location(obj, &m_page, &m_objidx); | ||
1272 | m_offset = obj_idx_to_offset(m_page, m_objidx, class->size); | ||
1273 | |||
1274 | vaddr = kmap_atomic(m_page); | ||
1275 | link = (struct link_free *)vaddr + m_offset / sizeof(*link); | ||
1276 | first_page->freelist = link->next; | ||
1277 | memset(link, POISON_INUSE, sizeof(*link)); | ||
1278 | kunmap_atomic(vaddr); | ||
1279 | |||
1280 | first_page->inuse++; | ||
1281 | zs_stat_inc(class, OBJ_USED, 1); | ||
1282 | /* Now move the zspage to another fullness group, if required */ | 1423 | /* Now move the zspage to another fullness group, if required */ |
1283 | fix_fullness_group(pool, first_page); | 1424 | fix_fullness_group(class, first_page); |
1425 | record_obj(handle, obj); | ||
1284 | spin_unlock(&class->lock); | 1426 | spin_unlock(&class->lock); |
1285 | 1427 | ||
1286 | return obj; | 1428 | return handle; |
1287 | } | 1429 | } |
1288 | EXPORT_SYMBOL_GPL(zs_malloc); | 1430 | EXPORT_SYMBOL_GPL(zs_malloc); |
1289 | 1431 | ||
1290 | void zs_free(struct zs_pool *pool, unsigned long obj) | 1432 | static void obj_free(struct zs_pool *pool, struct size_class *class, |
1433 | unsigned long obj) | ||
1291 | { | 1434 | { |
1292 | struct link_free *link; | 1435 | struct link_free *link; |
1293 | struct page *first_page, *f_page; | 1436 | struct page *first_page, *f_page; |
1294 | unsigned long f_objidx, f_offset; | 1437 | unsigned long f_objidx, f_offset; |
1295 | void *vaddr; | 1438 | void *vaddr; |
1296 | |||
1297 | int class_idx; | 1439 | int class_idx; |
1298 | struct size_class *class; | ||
1299 | enum fullness_group fullness; | 1440 | enum fullness_group fullness; |
1300 | 1441 | ||
1301 | if (unlikely(!obj)) | 1442 | BUG_ON(!obj); |
1302 | return; | ||
1303 | 1443 | ||
1304 | obj_handle_to_location(obj, &f_page, &f_objidx); | 1444 | obj &= ~OBJ_ALLOCATED_TAG; |
1445 | obj_to_location(obj, &f_page, &f_objidx); | ||
1305 | first_page = get_first_page(f_page); | 1446 | first_page = get_first_page(f_page); |
1306 | 1447 | ||
1307 | get_zspage_mapping(first_page, &class_idx, &fullness); | 1448 | get_zspage_mapping(first_page, &class_idx, &fullness); |
1308 | class = pool->size_class[class_idx]; | ||
1309 | f_offset = obj_idx_to_offset(f_page, f_objidx, class->size); | 1449 | f_offset = obj_idx_to_offset(f_page, f_objidx, class->size); |
1310 | 1450 | ||
1311 | spin_lock(&class->lock); | 1451 | vaddr = kmap_atomic(f_page); |
1312 | 1452 | ||
1313 | /* Insert this object in containing zspage's freelist */ | 1453 | /* Insert this object in containing zspage's freelist */ |
1314 | vaddr = kmap_atomic(f_page); | ||
1315 | link = (struct link_free *)(vaddr + f_offset); | 1454 | link = (struct link_free *)(vaddr + f_offset); |
1316 | link->next = first_page->freelist; | 1455 | link->next = first_page->freelist; |
1456 | if (class->huge) | ||
1457 | set_page_private(first_page, 0); | ||
1317 | kunmap_atomic(vaddr); | 1458 | kunmap_atomic(vaddr); |
1318 | first_page->freelist = (void *)obj; | 1459 | first_page->freelist = (void *)obj; |
1319 | |||
1320 | first_page->inuse--; | 1460 | first_page->inuse--; |
1321 | fullness = fix_fullness_group(pool, first_page); | ||
1322 | |||
1323 | zs_stat_dec(class, OBJ_USED, 1); | 1461 | zs_stat_dec(class, OBJ_USED, 1); |
1324 | if (fullness == ZS_EMPTY) | 1462 | } |
1463 | |||
1464 | void zs_free(struct zs_pool *pool, unsigned long handle) | ||
1465 | { | ||
1466 | struct page *first_page, *f_page; | ||
1467 | unsigned long obj, f_objidx; | ||
1468 | int class_idx; | ||
1469 | struct size_class *class; | ||
1470 | enum fullness_group fullness; | ||
1471 | |||
1472 | if (unlikely(!handle)) | ||
1473 | return; | ||
1474 | |||
1475 | pin_tag(handle); | ||
1476 | obj = handle_to_obj(handle); | ||
1477 | obj_to_location(obj, &f_page, &f_objidx); | ||
1478 | first_page = get_first_page(f_page); | ||
1479 | |||
1480 | get_zspage_mapping(first_page, &class_idx, &fullness); | ||
1481 | class = pool->size_class[class_idx]; | ||
1482 | |||
1483 | spin_lock(&class->lock); | ||
1484 | obj_free(pool, class, obj); | ||
1485 | fullness = fix_fullness_group(class, first_page); | ||
1486 | if (fullness == ZS_EMPTY) { | ||
1325 | zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage( | 1487 | zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage( |
1326 | class->size, class->pages_per_zspage)); | 1488 | class->size, class->pages_per_zspage)); |
1327 | 1489 | atomic_long_sub(class->pages_per_zspage, | |
1490 | &pool->pages_allocated); | ||
1491 | free_zspage(first_page); | ||
1492 | } | ||
1328 | spin_unlock(&class->lock); | 1493 | spin_unlock(&class->lock); |
1494 | unpin_tag(handle); | ||
1495 | |||
1496 | free_handle(pool, handle); | ||
1497 | } | ||
1498 | EXPORT_SYMBOL_GPL(zs_free); | ||
1499 | |||
1500 | static void zs_object_copy(unsigned long src, unsigned long dst, | ||
1501 | struct size_class *class) | ||
1502 | { | ||
1503 | struct page *s_page, *d_page; | ||
1504 | unsigned long s_objidx, d_objidx; | ||
1505 | unsigned long s_off, d_off; | ||
1506 | void *s_addr, *d_addr; | ||
1507 | int s_size, d_size, size; | ||
1508 | int written = 0; | ||
1509 | |||
1510 | s_size = d_size = class->size; | ||
1511 | |||
1512 | obj_to_location(src, &s_page, &s_objidx); | ||
1513 | obj_to_location(dst, &d_page, &d_objidx); | ||
1514 | |||
1515 | s_off = obj_idx_to_offset(s_page, s_objidx, class->size); | ||
1516 | d_off = obj_idx_to_offset(d_page, d_objidx, class->size); | ||
1517 | |||
1518 | if (s_off + class->size > PAGE_SIZE) | ||
1519 | s_size = PAGE_SIZE - s_off; | ||
1520 | |||
1521 | if (d_off + class->size > PAGE_SIZE) | ||
1522 | d_size = PAGE_SIZE - d_off; | ||
1523 | |||
1524 | s_addr = kmap_atomic(s_page); | ||
1525 | d_addr = kmap_atomic(d_page); | ||
1526 | |||
1527 | while (1) { | ||
1528 | size = min(s_size, d_size); | ||
1529 | memcpy(d_addr + d_off, s_addr + s_off, size); | ||
1530 | written += size; | ||
1531 | |||
1532 | if (written == class->size) | ||
1533 | break; | ||
1534 | |||
1535 | s_off += size; | ||
1536 | s_size -= size; | ||
1537 | d_off += size; | ||
1538 | d_size -= size; | ||
1539 | |||
1540 | if (s_off >= PAGE_SIZE) { | ||
1541 | kunmap_atomic(d_addr); | ||
1542 | kunmap_atomic(s_addr); | ||
1543 | s_page = get_next_page(s_page); | ||
1544 | BUG_ON(!s_page); | ||
1545 | s_addr = kmap_atomic(s_page); | ||
1546 | d_addr = kmap_atomic(d_page); | ||
1547 | s_size = class->size - written; | ||
1548 | s_off = 0; | ||
1549 | } | ||
1550 | |||
1551 | if (d_off >= PAGE_SIZE) { | ||
1552 | kunmap_atomic(d_addr); | ||
1553 | d_page = get_next_page(d_page); | ||
1554 | BUG_ON(!d_page); | ||
1555 | d_addr = kmap_atomic(d_page); | ||
1556 | d_size = class->size - written; | ||
1557 | d_off = 0; | ||
1558 | } | ||
1559 | } | ||
1560 | |||
1561 | kunmap_atomic(d_addr); | ||
1562 | kunmap_atomic(s_addr); | ||
1563 | } | ||
1564 | |||
1565 | /* | ||
1566 | * Find alloced object in zspage from index object and | ||
1567 | * return handle. | ||
1568 | */ | ||
1569 | static unsigned long find_alloced_obj(struct page *page, int index, | ||
1570 | struct size_class *class) | ||
1571 | { | ||
1572 | unsigned long head; | ||
1573 | int offset = 0; | ||
1574 | unsigned long handle = 0; | ||
1575 | void *addr = kmap_atomic(page); | ||
1576 | |||
1577 | if (!is_first_page(page)) | ||
1578 | offset = page->index; | ||
1579 | offset += class->size * index; | ||
1580 | |||
1581 | while (offset < PAGE_SIZE) { | ||
1582 | head = obj_to_head(class, page, addr + offset); | ||
1583 | if (head & OBJ_ALLOCATED_TAG) { | ||
1584 | handle = head & ~OBJ_ALLOCATED_TAG; | ||
1585 | if (trypin_tag(handle)) | ||
1586 | break; | ||
1587 | handle = 0; | ||
1588 | } | ||
1589 | |||
1590 | offset += class->size; | ||
1591 | index++; | ||
1592 | } | ||
1593 | |||
1594 | kunmap_atomic(addr); | ||
1595 | return handle; | ||
1596 | } | ||
1597 | |||
1598 | struct zs_compact_control { | ||
1599 | /* Source page for migration which could be a subpage of zspage. */ | ||
1600 | struct page *s_page; | ||
1601 | /* Destination page for migration which should be a first page | ||
1602 | * of zspage. */ | ||
1603 | struct page *d_page; | ||
1604 | /* Starting object index within @s_page which used for live object | ||
1605 | * in the subpage. */ | ||
1606 | int index; | ||
1607 | /* how many of objects are migrated */ | ||
1608 | int nr_migrated; | ||
1609 | }; | ||
1610 | |||
1611 | static int migrate_zspage(struct zs_pool *pool, struct size_class *class, | ||
1612 | struct zs_compact_control *cc) | ||
1613 | { | ||
1614 | unsigned long used_obj, free_obj; | ||
1615 | unsigned long handle; | ||
1616 | struct page *s_page = cc->s_page; | ||
1617 | struct page *d_page = cc->d_page; | ||
1618 | unsigned long index = cc->index; | ||
1619 | int nr_migrated = 0; | ||
1620 | int ret = 0; | ||
1621 | |||
1622 | while (1) { | ||
1623 | handle = find_alloced_obj(s_page, index, class); | ||
1624 | if (!handle) { | ||
1625 | s_page = get_next_page(s_page); | ||
1626 | if (!s_page) | ||
1627 | break; | ||
1628 | index = 0; | ||
1629 | continue; | ||
1630 | } | ||
1631 | |||
1632 | /* Stop if there is no more space */ | ||
1633 | if (zspage_full(d_page)) { | ||
1634 | unpin_tag(handle); | ||
1635 | ret = -ENOMEM; | ||
1636 | break; | ||
1637 | } | ||
1638 | |||
1639 | used_obj = handle_to_obj(handle); | ||
1640 | free_obj = obj_malloc(d_page, class, handle); | ||
1641 | zs_object_copy(used_obj, free_obj, class); | ||
1642 | index++; | ||
1643 | record_obj(handle, free_obj); | ||
1644 | unpin_tag(handle); | ||
1645 | obj_free(pool, class, used_obj); | ||
1646 | nr_migrated++; | ||
1647 | } | ||
1648 | |||
1649 | /* Remember last position in this iteration */ | ||
1650 | cc->s_page = s_page; | ||
1651 | cc->index = index; | ||
1652 | cc->nr_migrated = nr_migrated; | ||
1653 | |||
1654 | return ret; | ||
1655 | } | ||
1656 | |||
1657 | static struct page *alloc_target_page(struct size_class *class) | ||
1658 | { | ||
1659 | int i; | ||
1660 | struct page *page; | ||
1661 | |||
1662 | for (i = 0; i < _ZS_NR_FULLNESS_GROUPS; i++) { | ||
1663 | page = class->fullness_list[i]; | ||
1664 | if (page) { | ||
1665 | remove_zspage(page, class, i); | ||
1666 | break; | ||
1667 | } | ||
1668 | } | ||
1669 | |||
1670 | return page; | ||
1671 | } | ||
1672 | |||
1673 | static void putback_zspage(struct zs_pool *pool, struct size_class *class, | ||
1674 | struct page *first_page) | ||
1675 | { | ||
1676 | enum fullness_group fullness; | ||
1677 | |||
1678 | BUG_ON(!is_first_page(first_page)); | ||
1679 | |||
1680 | fullness = get_fullness_group(first_page); | ||
1681 | insert_zspage(first_page, class, fullness); | ||
1682 | set_zspage_mapping(first_page, class->index, fullness); | ||
1329 | 1683 | ||
1330 | if (fullness == ZS_EMPTY) { | 1684 | if (fullness == ZS_EMPTY) { |
1685 | zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage( | ||
1686 | class->size, class->pages_per_zspage)); | ||
1331 | atomic_long_sub(class->pages_per_zspage, | 1687 | atomic_long_sub(class->pages_per_zspage, |
1332 | &pool->pages_allocated); | 1688 | &pool->pages_allocated); |
1689 | |||
1333 | free_zspage(first_page); | 1690 | free_zspage(first_page); |
1334 | } | 1691 | } |
1335 | } | 1692 | } |
1336 | EXPORT_SYMBOL_GPL(zs_free); | 1693 | |
1694 | static struct page *isolate_source_page(struct size_class *class) | ||
1695 | { | ||
1696 | struct page *page; | ||
1697 | |||
1698 | page = class->fullness_list[ZS_ALMOST_EMPTY]; | ||
1699 | if (page) | ||
1700 | remove_zspage(page, class, ZS_ALMOST_EMPTY); | ||
1701 | |||
1702 | return page; | ||
1703 | } | ||
1704 | |||
1705 | static unsigned long __zs_compact(struct zs_pool *pool, | ||
1706 | struct size_class *class) | ||
1707 | { | ||
1708 | int nr_to_migrate; | ||
1709 | struct zs_compact_control cc; | ||
1710 | struct page *src_page; | ||
1711 | struct page *dst_page = NULL; | ||
1712 | unsigned long nr_total_migrated = 0; | ||
1713 | |||
1714 | spin_lock(&class->lock); | ||
1715 | while ((src_page = isolate_source_page(class))) { | ||
1716 | |||
1717 | BUG_ON(!is_first_page(src_page)); | ||
1718 | |||
1719 | /* The goal is to migrate all live objects in source page */ | ||
1720 | nr_to_migrate = src_page->inuse; | ||
1721 | cc.index = 0; | ||
1722 | cc.s_page = src_page; | ||
1723 | |||
1724 | while ((dst_page = alloc_target_page(class))) { | ||
1725 | cc.d_page = dst_page; | ||
1726 | /* | ||
1727 | * If there is no more space in dst_page, try to | ||
1728 | * allocate another zspage. | ||
1729 | */ | ||
1730 | if (!migrate_zspage(pool, class, &cc)) | ||
1731 | break; | ||
1732 | |||
1733 | putback_zspage(pool, class, dst_page); | ||
1734 | nr_total_migrated += cc.nr_migrated; | ||
1735 | nr_to_migrate -= cc.nr_migrated; | ||
1736 | } | ||
1737 | |||
1738 | /* Stop if we couldn't find slot */ | ||
1739 | if (dst_page == NULL) | ||
1740 | break; | ||
1741 | |||
1742 | putback_zspage(pool, class, dst_page); | ||
1743 | putback_zspage(pool, class, src_page); | ||
1744 | spin_unlock(&class->lock); | ||
1745 | nr_total_migrated += cc.nr_migrated; | ||
1746 | cond_resched(); | ||
1747 | spin_lock(&class->lock); | ||
1748 | } | ||
1749 | |||
1750 | if (src_page) | ||
1751 | putback_zspage(pool, class, src_page); | ||
1752 | |||
1753 | spin_unlock(&class->lock); | ||
1754 | |||
1755 | return nr_total_migrated; | ||
1756 | } | ||
1757 | |||
1758 | unsigned long zs_compact(struct zs_pool *pool) | ||
1759 | { | ||
1760 | int i; | ||
1761 | unsigned long nr_migrated = 0; | ||
1762 | struct size_class *class; | ||
1763 | |||
1764 | for (i = zs_size_classes - 1; i >= 0; i--) { | ||
1765 | class = pool->size_class[i]; | ||
1766 | if (!class) | ||
1767 | continue; | ||
1768 | if (class->index != i) | ||
1769 | continue; | ||
1770 | nr_migrated += __zs_compact(pool, class); | ||
1771 | } | ||
1772 | |||
1773 | return nr_migrated; | ||
1774 | } | ||
1775 | EXPORT_SYMBOL_GPL(zs_compact); | ||
1337 | 1776 | ||
1338 | /** | 1777 | /** |
1339 | * zs_create_pool - Creates an allocation pool to work from. | 1778 | * zs_create_pool - Creates an allocation pool to work from. |
@@ -1355,20 +1794,20 @@ struct zs_pool *zs_create_pool(char *name, gfp_t flags) | |||
1355 | if (!pool) | 1794 | if (!pool) |
1356 | return NULL; | 1795 | return NULL; |
1357 | 1796 | ||
1358 | pool->name = kstrdup(name, GFP_KERNEL); | ||
1359 | if (!pool->name) { | ||
1360 | kfree(pool); | ||
1361 | return NULL; | ||
1362 | } | ||
1363 | |||
1364 | pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *), | 1797 | pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *), |
1365 | GFP_KERNEL); | 1798 | GFP_KERNEL); |
1366 | if (!pool->size_class) { | 1799 | if (!pool->size_class) { |
1367 | kfree(pool->name); | ||
1368 | kfree(pool); | 1800 | kfree(pool); |
1369 | return NULL; | 1801 | return NULL; |
1370 | } | 1802 | } |
1371 | 1803 | ||
1804 | pool->name = kstrdup(name, GFP_KERNEL); | ||
1805 | if (!pool->name) | ||
1806 | goto err; | ||
1807 | |||
1808 | if (create_handle_cache(pool)) | ||
1809 | goto err; | ||
1810 | |||
1372 | /* | 1811 | /* |
1373 | * Iterate reversly, because, size of size_class that we want to use | 1812 | * Iterate reversly, because, size of size_class that we want to use |
1374 | * for merging should be larger or equal to current size. | 1813 | * for merging should be larger or equal to current size. |
@@ -1406,6 +1845,9 @@ struct zs_pool *zs_create_pool(char *name, gfp_t flags) | |||
1406 | class->size = size; | 1845 | class->size = size; |
1407 | class->index = i; | 1846 | class->index = i; |
1408 | class->pages_per_zspage = pages_per_zspage; | 1847 | class->pages_per_zspage = pages_per_zspage; |
1848 | if (pages_per_zspage == 1 && | ||
1849 | get_maxobj_per_zspage(size, pages_per_zspage) == 1) | ||
1850 | class->huge = true; | ||
1409 | spin_lock_init(&class->lock); | 1851 | spin_lock_init(&class->lock); |
1410 | pool->size_class[i] = class; | 1852 | pool->size_class[i] = class; |
1411 | 1853 | ||
@@ -1450,6 +1892,7 @@ void zs_destroy_pool(struct zs_pool *pool) | |||
1450 | kfree(class); | 1892 | kfree(class); |
1451 | } | 1893 | } |
1452 | 1894 | ||
1895 | destroy_handle_cache(pool); | ||
1453 | kfree(pool->size_class); | 1896 | kfree(pool->size_class); |
1454 | kfree(pool->name); | 1897 | kfree(pool->name); |
1455 | kfree(pool); | 1898 | kfree(pool); |