aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMinchan Kim <minchan@kernel.org>2015-04-15 19:15:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-15 19:35:20 -0400
commit312fcae227037619dc858c9ccd362c7b847730a2 (patch)
treedc20f7720c297cc0426f039c71794e378d885007 /mm
parentc78062612fb525430b775a0bef4d3cc07e512da0 (diff)
zsmalloc: support compaction
This patch provides core functions for migration of zsmalloc. Migraion policy is simple as follows. for each size class { while { src_page = get zs_page from ZS_ALMOST_EMPTY if (!src_page) break; dst_page = get zs_page from ZS_ALMOST_FULL if (!dst_page) dst_page = get zs_page from ZS_ALMOST_EMPTY if (!dst_page) break; migrate(from src_page, to dst_page); } } For migration, we need to identify which objects in zspage are allocated to migrate them out. We could know it by iterating of freed objects in a zspage because first_page of zspage keeps free objects singly-linked list but it's not efficient. Instead, this patch adds a tag(ie, OBJ_ALLOCATED_TAG) in header of each object(ie, handle) so we could check whether the object is allocated easily. This patch adds another status bit in handle to synchronize between user access through zs_map_object and migration. During migration, we cannot move objects user are using due to data coherency between old object and new object. [akpm@linux-foundation.org: zsmalloc.c needs sched.h for cond_resched()] Signed-off-by: Minchan Kim <minchan@kernel.org> Cc: Juneho Choi <juno.choi@lge.com> Cc: Gunho Lee <gunho.lee@lge.com> Cc: Luigi Semenzato <semenzato@google.com> Cc: Dan Streetman <ddstreet@ieee.org> Cc: Seth Jennings <sjennings@variantweb.net> Cc: Nitin Gupta <ngupta@vflare.org> Cc: Jerome Marchand <jmarchan@redhat.com> Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/zsmalloc.c378
1 files changed, 359 insertions, 19 deletions
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 55b171016f4f..c4ae608dc725 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -78,6 +78,7 @@
78 78
79#include <linux/module.h> 79#include <linux/module.h>
80#include <linux/kernel.h> 80#include <linux/kernel.h>
81#include <linux/sched.h>
81#include <linux/bitops.h> 82#include <linux/bitops.h>
82#include <linux/errno.h> 83#include <linux/errno.h>
83#include <linux/highmem.h> 84#include <linux/highmem.h>
@@ -135,7 +136,26 @@
135#endif 136#endif
136#endif 137#endif
137#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) 138#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
138#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS) 139
140/*
141 * Memory for allocating for handle keeps object position by
142 * encoding <page, obj_idx> and the encoded value has a room
143 * in least bit(ie, look at obj_to_location).
144 * We use the bit to synchronize between object access by
145 * user and migration.
146 */
147#define HANDLE_PIN_BIT 0
148
149/*
150 * Head in allocated object should have OBJ_ALLOCATED_TAG
151 * to identify the object was allocated or not.
152 * It's okay to add the status bit in the least bit because
153 * header keeps handle which is 4byte-aligned address so we
154 * have room for two bit at least.
155 */
156#define OBJ_ALLOCATED_TAG 1
157#define OBJ_TAG_BITS 1
158#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS)
139#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) 159#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
140 160
141#define MAX(a, b) ((a) >= (b) ? (a) : (b)) 161#define MAX(a, b) ((a) >= (b) ? (a) : (b))
@@ -610,35 +630,35 @@ static struct page *get_next_page(struct page *page)
610 630
611/* 631/*
612 * Encode <page, obj_idx> as a single handle value. 632 * Encode <page, obj_idx> as a single handle value.
613 * On hardware platforms with physical memory starting at 0x0 the pfn 633 * We use the least bit of handle for tagging.
614 * could be 0 so we ensure that the handle will never be 0 by adjusting the
615 * encoded obj_idx value before encoding.
616 */ 634 */
617static void *obj_location_to_handle(struct page *page, unsigned long obj_idx) 635static void *location_to_obj(struct page *page, unsigned long obj_idx)
618{ 636{
619 unsigned long handle; 637 unsigned long obj;
620 638
621 if (!page) { 639 if (!page) {
622 BUG_ON(obj_idx); 640 BUG_ON(obj_idx);
623 return NULL; 641 return NULL;
624 } 642 }
625 643
626 handle = page_to_pfn(page) << OBJ_INDEX_BITS; 644 obj = page_to_pfn(page) << OBJ_INDEX_BITS;
627 handle |= ((obj_idx + 1) & OBJ_INDEX_MASK); 645 obj |= ((obj_idx) & OBJ_INDEX_MASK);
646 obj <<= OBJ_TAG_BITS;
628 647
629 return (void *)handle; 648 return (void *)obj;
630} 649}
631 650
632/* 651/*
633 * Decode <page, obj_idx> pair from the given object handle. We adjust the 652 * Decode <page, obj_idx> pair from the given object handle. We adjust the
634 * decoded obj_idx back to its original value since it was adjusted in 653 * decoded obj_idx back to its original value since it was adjusted in
635 * obj_location_to_handle(). 654 * location_to_obj().
636 */ 655 */
637static void obj_to_location(unsigned long handle, struct page **page, 656static void obj_to_location(unsigned long obj, struct page **page,
638 unsigned long *obj_idx) 657 unsigned long *obj_idx)
639{ 658{
640 *page = pfn_to_page(handle >> OBJ_INDEX_BITS); 659 obj >>= OBJ_TAG_BITS;
641 *obj_idx = (handle & OBJ_INDEX_MASK) - 1; 660 *page = pfn_to_page(obj >> OBJ_INDEX_BITS);
661 *obj_idx = (obj & OBJ_INDEX_MASK);
642} 662}
643 663
644static unsigned long handle_to_obj(unsigned long handle) 664static unsigned long handle_to_obj(unsigned long handle)
@@ -646,6 +666,11 @@ static unsigned long handle_to_obj(unsigned long handle)
646 return *(unsigned long *)handle; 666 return *(unsigned long *)handle;
647} 667}
648 668
669unsigned long obj_to_head(void *obj)
670{
671 return *(unsigned long *)obj;
672}
673
649static unsigned long obj_idx_to_offset(struct page *page, 674static unsigned long obj_idx_to_offset(struct page *page,
650 unsigned long obj_idx, int class_size) 675 unsigned long obj_idx, int class_size)
651{ 676{
@@ -657,6 +682,25 @@ static unsigned long obj_idx_to_offset(struct page *page,
657 return off + obj_idx * class_size; 682 return off + obj_idx * class_size;
658} 683}
659 684
685static inline int trypin_tag(unsigned long handle)
686{
687 unsigned long *ptr = (unsigned long *)handle;
688
689 return !test_and_set_bit_lock(HANDLE_PIN_BIT, ptr);
690}
691
692static void pin_tag(unsigned long handle)
693{
694 while (!trypin_tag(handle));
695}
696
697static void unpin_tag(unsigned long handle)
698{
699 unsigned long *ptr = (unsigned long *)handle;
700
701 clear_bit_unlock(HANDLE_PIN_BIT, ptr);
702}
703
660static void reset_page(struct page *page) 704static void reset_page(struct page *page)
661{ 705{
662 clear_bit(PG_private, &page->flags); 706 clear_bit(PG_private, &page->flags);
@@ -718,7 +762,7 @@ static void init_zspage(struct page *first_page, struct size_class *class)
718 link = (struct link_free *)vaddr + off / sizeof(*link); 762 link = (struct link_free *)vaddr + off / sizeof(*link);
719 763
720 while ((off += class->size) < PAGE_SIZE) { 764 while ((off += class->size) < PAGE_SIZE) {
721 link->next = obj_location_to_handle(page, i++); 765 link->next = location_to_obj(page, i++);
722 link += class->size / sizeof(*link); 766 link += class->size / sizeof(*link);
723 } 767 }
724 768
@@ -728,7 +772,7 @@ static void init_zspage(struct page *first_page, struct size_class *class)
728 * page (if present) 772 * page (if present)
729 */ 773 */
730 next_page = get_next_page(page); 774 next_page = get_next_page(page);
731 link->next = obj_location_to_handle(next_page, 0); 775 link->next = location_to_obj(next_page, 0);
732 kunmap_atomic(vaddr); 776 kunmap_atomic(vaddr);
733 page = next_page; 777 page = next_page;
734 off %= PAGE_SIZE; 778 off %= PAGE_SIZE;
@@ -782,7 +826,7 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
782 826
783 init_zspage(first_page, class); 827 init_zspage(first_page, class);
784 828
785 first_page->freelist = obj_location_to_handle(first_page, 0); 829 first_page->freelist = location_to_obj(first_page, 0);
786 /* Maximum number of objects we can store in this zspage */ 830 /* Maximum number of objects we can store in this zspage */
787 first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size; 831 first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size;
788 832
@@ -1017,6 +1061,13 @@ static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)
1017 return true; 1061 return true;
1018} 1062}
1019 1063
1064static bool zspage_full(struct page *page)
1065{
1066 BUG_ON(!is_first_page(page));
1067
1068 return page->inuse == page->objects;
1069}
1070
1020#ifdef CONFIG_ZSMALLOC_STAT 1071#ifdef CONFIG_ZSMALLOC_STAT
1021 1072
1022static inline void zs_stat_inc(struct size_class *class, 1073static inline void zs_stat_inc(struct size_class *class,
@@ -1219,6 +1270,9 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
1219 */ 1270 */
1220 BUG_ON(in_interrupt()); 1271 BUG_ON(in_interrupt());
1221 1272
1273 /* From now on, migration cannot move the object */
1274 pin_tag(handle);
1275
1222 obj = handle_to_obj(handle); 1276 obj = handle_to_obj(handle);
1223 obj_to_location(obj, &page, &obj_idx); 1277 obj_to_location(obj, &page, &obj_idx);
1224 get_zspage_mapping(get_first_page(page), &class_idx, &fg); 1278 get_zspage_mapping(get_first_page(page), &class_idx, &fg);
@@ -1276,6 +1330,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
1276 __zs_unmap_object(area, pages, off, class->size); 1330 __zs_unmap_object(area, pages, off, class->size);
1277 } 1331 }
1278 put_cpu_var(zs_map_area); 1332 put_cpu_var(zs_map_area);
1333 unpin_tag(handle);
1279} 1334}
1280EXPORT_SYMBOL_GPL(zs_unmap_object); 1335EXPORT_SYMBOL_GPL(zs_unmap_object);
1281 1336
@@ -1289,6 +1344,7 @@ static unsigned long obj_malloc(struct page *first_page,
1289 unsigned long m_objidx, m_offset; 1344 unsigned long m_objidx, m_offset;
1290 void *vaddr; 1345 void *vaddr;
1291 1346
1347 handle |= OBJ_ALLOCATED_TAG;
1292 obj = (unsigned long)first_page->freelist; 1348 obj = (unsigned long)first_page->freelist;
1293 obj_to_location(obj, &m_page, &m_objidx); 1349 obj_to_location(obj, &m_page, &m_objidx);
1294 m_offset = obj_idx_to_offset(m_page, m_objidx, class->size); 1350 m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
@@ -1374,6 +1430,7 @@ static void obj_free(struct zs_pool *pool, struct size_class *class,
1374 1430
1375 BUG_ON(!obj); 1431 BUG_ON(!obj);
1376 1432
1433 obj &= ~OBJ_ALLOCATED_TAG;
1377 obj_to_location(obj, &f_page, &f_objidx); 1434 obj_to_location(obj, &f_page, &f_objidx);
1378 first_page = get_first_page(f_page); 1435 first_page = get_first_page(f_page);
1379 1436
@@ -1402,8 +1459,8 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
1402 if (unlikely(!handle)) 1459 if (unlikely(!handle))
1403 return; 1460 return;
1404 1461
1462 pin_tag(handle);
1405 obj = handle_to_obj(handle); 1463 obj = handle_to_obj(handle);
1406 free_handle(pool, handle);
1407 obj_to_location(obj, &f_page, &f_objidx); 1464 obj_to_location(obj, &f_page, &f_objidx);
1408 first_page = get_first_page(f_page); 1465 first_page = get_first_page(f_page);
1409 1466
@@ -1413,18 +1470,301 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
1413 spin_lock(&class->lock); 1470 spin_lock(&class->lock);
1414 obj_free(pool, class, obj); 1471 obj_free(pool, class, obj);
1415 fullness = fix_fullness_group(class, first_page); 1472 fullness = fix_fullness_group(class, first_page);
1416 if (fullness == ZS_EMPTY) 1473 if (fullness == ZS_EMPTY) {
1417 zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage( 1474 zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
1418 class->size, class->pages_per_zspage)); 1475 class->size, class->pages_per_zspage));
1476 atomic_long_sub(class->pages_per_zspage,
1477 &pool->pages_allocated);
1478 free_zspage(first_page);
1479 }
1419 spin_unlock(&class->lock); 1480 spin_unlock(&class->lock);
1481 unpin_tag(handle);
1482
1483 free_handle(pool, handle);
1484}
1485EXPORT_SYMBOL_GPL(zs_free);
1486
1487static void zs_object_copy(unsigned long src, unsigned long dst,
1488 struct size_class *class)
1489{
1490 struct page *s_page, *d_page;
1491 unsigned long s_objidx, d_objidx;
1492 unsigned long s_off, d_off;
1493 void *s_addr, *d_addr;
1494 int s_size, d_size, size;
1495 int written = 0;
1496
1497 s_size = d_size = class->size;
1498
1499 obj_to_location(src, &s_page, &s_objidx);
1500 obj_to_location(dst, &d_page, &d_objidx);
1501
1502 s_off = obj_idx_to_offset(s_page, s_objidx, class->size);
1503 d_off = obj_idx_to_offset(d_page, d_objidx, class->size);
1504
1505 if (s_off + class->size > PAGE_SIZE)
1506 s_size = PAGE_SIZE - s_off;
1507
1508 if (d_off + class->size > PAGE_SIZE)
1509 d_size = PAGE_SIZE - d_off;
1510
1511 s_addr = kmap_atomic(s_page);
1512 d_addr = kmap_atomic(d_page);
1513
1514 while (1) {
1515 size = min(s_size, d_size);
1516 memcpy(d_addr + d_off, s_addr + s_off, size);
1517 written += size;
1518
1519 if (written == class->size)
1520 break;
1521
1522 if (s_off + size >= PAGE_SIZE) {
1523 kunmap_atomic(d_addr);
1524 kunmap_atomic(s_addr);
1525 s_page = get_next_page(s_page);
1526 BUG_ON(!s_page);
1527 s_addr = kmap_atomic(s_page);
1528 d_addr = kmap_atomic(d_page);
1529 s_size = class->size - written;
1530 s_off = 0;
1531 } else {
1532 s_off += size;
1533 s_size -= size;
1534 }
1535
1536 if (d_off + size >= PAGE_SIZE) {
1537 kunmap_atomic(d_addr);
1538 d_page = get_next_page(d_page);
1539 BUG_ON(!d_page);
1540 d_addr = kmap_atomic(d_page);
1541 d_size = class->size - written;
1542 d_off = 0;
1543 } else {
1544 d_off += size;
1545 d_size -= size;
1546 }
1547 }
1548
1549 kunmap_atomic(d_addr);
1550 kunmap_atomic(s_addr);
1551}
1552
1553/*
1554 * Find alloced object in zspage from index object and
1555 * return handle.
1556 */
1557static unsigned long find_alloced_obj(struct page *page, int index,
1558 struct size_class *class)
1559{
1560 unsigned long head;
1561 int offset = 0;
1562 unsigned long handle = 0;
1563 void *addr = kmap_atomic(page);
1564
1565 if (!is_first_page(page))
1566 offset = page->index;
1567 offset += class->size * index;
1568
1569 while (offset < PAGE_SIZE) {
1570 head = obj_to_head(addr + offset);
1571 if (head & OBJ_ALLOCATED_TAG) {
1572 handle = head & ~OBJ_ALLOCATED_TAG;
1573 if (trypin_tag(handle))
1574 break;
1575 handle = 0;
1576 }
1577
1578 offset += class->size;
1579 index++;
1580 }
1581
1582 kunmap_atomic(addr);
1583 return handle;
1584}
1585
1586struct zs_compact_control {
1587 /* Source page for migration which could be a subpage of zspage. */
1588 struct page *s_page;
1589 /* Destination page for migration which should be a first page
1590 * of zspage. */
1591 struct page *d_page;
1592 /* Starting object index within @s_page which used for live object
1593 * in the subpage. */
1594 int index;
1595 /* how many of objects are migrated */
1596 int nr_migrated;
1597};
1598
1599static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
1600 struct zs_compact_control *cc)
1601{
1602 unsigned long used_obj, free_obj;
1603 unsigned long handle;
1604 struct page *s_page = cc->s_page;
1605 struct page *d_page = cc->d_page;
1606 unsigned long index = cc->index;
1607 int nr_migrated = 0;
1608 int ret = 0;
1609
1610 while (1) {
1611 handle = find_alloced_obj(s_page, index, class);
1612 if (!handle) {
1613 s_page = get_next_page(s_page);
1614 if (!s_page)
1615 break;
1616 index = 0;
1617 continue;
1618 }
1619
1620 /* Stop if there is no more space */
1621 if (zspage_full(d_page)) {
1622 unpin_tag(handle);
1623 ret = -ENOMEM;
1624 break;
1625 }
1420 1626
1627 used_obj = handle_to_obj(handle);
1628 free_obj = obj_malloc(d_page, class, handle);
1629 zs_object_copy(used_obj, free_obj, class);
1630 index++;
1631 record_obj(handle, free_obj);
1632 unpin_tag(handle);
1633 obj_free(pool, class, used_obj);
1634 nr_migrated++;
1635 }
1636
1637 /* Remember last position in this iteration */
1638 cc->s_page = s_page;
1639 cc->index = index;
1640 cc->nr_migrated = nr_migrated;
1641
1642 return ret;
1643}
1644
1645static struct page *alloc_target_page(struct size_class *class)
1646{
1647 int i;
1648 struct page *page;
1649
1650 for (i = 0; i < _ZS_NR_FULLNESS_GROUPS; i++) {
1651 page = class->fullness_list[i];
1652 if (page) {
1653 remove_zspage(page, class, i);
1654 break;
1655 }
1656 }
1657
1658 return page;
1659}
1660
1661static void putback_zspage(struct zs_pool *pool, struct size_class *class,
1662 struct page *first_page)
1663{
1664 int class_idx;
1665 enum fullness_group fullness;
1666
1667 BUG_ON(!is_first_page(first_page));
1668
1669 get_zspage_mapping(first_page, &class_idx, &fullness);
1670 insert_zspage(first_page, class, fullness);
1671 fullness = fix_fullness_group(class, first_page);
1421 if (fullness == ZS_EMPTY) { 1672 if (fullness == ZS_EMPTY) {
1673 zs_stat_dec(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
1674 class->size, class->pages_per_zspage));
1422 atomic_long_sub(class->pages_per_zspage, 1675 atomic_long_sub(class->pages_per_zspage,
1423 &pool->pages_allocated); 1676 &pool->pages_allocated);
1677
1424 free_zspage(first_page); 1678 free_zspage(first_page);
1425 } 1679 }
1426} 1680}
1427EXPORT_SYMBOL_GPL(zs_free); 1681
1682static struct page *isolate_source_page(struct size_class *class)
1683{
1684 struct page *page;
1685
1686 page = class->fullness_list[ZS_ALMOST_EMPTY];
1687 if (page)
1688 remove_zspage(page, class, ZS_ALMOST_EMPTY);
1689
1690 return page;
1691}
1692
1693static unsigned long __zs_compact(struct zs_pool *pool,
1694 struct size_class *class)
1695{
1696 int nr_to_migrate;
1697 struct zs_compact_control cc;
1698 struct page *src_page;
1699 struct page *dst_page = NULL;
1700 unsigned long nr_total_migrated = 0;
1701
1702 cond_resched();
1703
1704 spin_lock(&class->lock);
1705 while ((src_page = isolate_source_page(class))) {
1706
1707 BUG_ON(!is_first_page(src_page));
1708
1709 /* The goal is to migrate all live objects in source page */
1710 nr_to_migrate = src_page->inuse;
1711 cc.index = 0;
1712 cc.s_page = src_page;
1713
1714 while ((dst_page = alloc_target_page(class))) {
1715 cc.d_page = dst_page;
1716 /*
1717 * If there is no more space in dst_page, try to
1718 * allocate another zspage.
1719 */
1720 if (!migrate_zspage(pool, class, &cc))
1721 break;
1722
1723 putback_zspage(pool, class, dst_page);
1724 nr_total_migrated += cc.nr_migrated;
1725 nr_to_migrate -= cc.nr_migrated;
1726 }
1727
1728 /* Stop if we couldn't find slot */
1729 if (dst_page == NULL)
1730 break;
1731
1732 putback_zspage(pool, class, dst_page);
1733 putback_zspage(pool, class, src_page);
1734 spin_unlock(&class->lock);
1735 nr_total_migrated += cc.nr_migrated;
1736 cond_resched();
1737 spin_lock(&class->lock);
1738 }
1739
1740 if (src_page)
1741 putback_zspage(pool, class, src_page);
1742
1743 spin_unlock(&class->lock);
1744
1745 return nr_total_migrated;
1746}
1747
1748unsigned long zs_compact(struct zs_pool *pool)
1749{
1750 int i;
1751 unsigned long nr_migrated = 0;
1752 struct size_class *class;
1753
1754 for (i = zs_size_classes - 1; i >= 0; i--) {
1755 class = pool->size_class[i];
1756 if (!class)
1757 continue;
1758 if (class->index != i)
1759 continue;
1760 nr_migrated += __zs_compact(pool, class);
1761 }
1762
1763 synchronize_rcu();
1764
1765 return nr_migrated;
1766}
1767EXPORT_SYMBOL_GPL(zs_compact);
1428 1768
1429/** 1769/**
1430 * zs_create_pool - Creates an allocation pool to work from. 1770 * zs_create_pool - Creates an allocation pool to work from.