aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/slab.c571
1 files changed, 217 insertions, 354 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 2580db062df9..a983e3084332 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -164,72 +164,6 @@
164static bool pfmemalloc_active __read_mostly; 164static bool pfmemalloc_active __read_mostly;
165 165
166/* 166/*
167 * kmem_bufctl_t:
168 *
169 * Bufctl's are used for linking objs within a slab
170 * linked offsets.
171 *
172 * This implementation relies on "struct page" for locating the cache &
173 * slab an object belongs to.
174 * This allows the bufctl structure to be small (one int), but limits
175 * the number of objects a slab (not a cache) can contain when off-slab
176 * bufctls are used. The limit is the size of the largest general cache
177 * that does not use off-slab slabs.
178 * For 32bit archs with 4 kB pages, is this 56.
179 * This is not serious, as it is only for large objects, when it is unwise
180 * to have too many per slab.
181 * Note: This limit can be raised by introducing a general cache whose size
182 * is less than 512 (PAGE_SIZE<<3), but greater than 256.
183 */
184
185typedef unsigned int kmem_bufctl_t;
186#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
187#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
188#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
189#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
190
191/*
192 * struct slab_rcu
193 *
194 * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
195 * arrange for kmem_freepages to be called via RCU. This is useful if
196 * we need to approach a kernel structure obliquely, from its address
197 * obtained without the usual locking. We can lock the structure to
198 * stabilize it and check it's still at the given address, only if we
199 * can be sure that the memory has not been meanwhile reused for some
200 * other kind of object (which our subsystem's lock might corrupt).
201 *
202 * rcu_read_lock before reading the address, then rcu_read_unlock after
203 * taking the spinlock within the structure expected at that address.
204 */
205struct slab_rcu {
206 struct rcu_head head;
207 struct kmem_cache *cachep;
208 void *addr;
209};
210
211/*
212 * struct slab
213 *
214 * Manages the objs in a slab. Placed either at the beginning of mem allocated
215 * for a slab, or allocated from an general cache.
216 * Slabs are chained into three list: fully used, partial, fully free slabs.
217 */
218struct slab {
219 union {
220 struct {
221 struct list_head list;
222 unsigned long colouroff;
223 void *s_mem; /* including colour offset */
224 unsigned int inuse; /* num of objs active in slab */
225 kmem_bufctl_t free;
226 unsigned short nodeid;
227 };
228 struct slab_rcu __slab_cover_slab_rcu;
229 };
230};
231
232/*
233 * struct array_cache 167 * struct array_cache
234 * 168 *
235 * Purpose: 169 * Purpose:
@@ -456,18 +390,10 @@ static inline struct kmem_cache *virt_to_cache(const void *obj)
456 return page->slab_cache; 390 return page->slab_cache;
457} 391}
458 392
459static inline struct slab *virt_to_slab(const void *obj) 393static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,
460{
461 struct page *page = virt_to_head_page(obj);
462
463 VM_BUG_ON(!PageSlab(page));
464 return page->slab_page;
465}
466
467static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
468 unsigned int idx) 394 unsigned int idx)
469{ 395{
470 return slab->s_mem + cache->size * idx; 396 return page->s_mem + cache->size * idx;
471} 397}
472 398
473/* 399/*
@@ -477,9 +403,9 @@ static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
477 * reciprocal_divide(offset, cache->reciprocal_buffer_size) 403 * reciprocal_divide(offset, cache->reciprocal_buffer_size)
478 */ 404 */
479static inline unsigned int obj_to_index(const struct kmem_cache *cache, 405static inline unsigned int obj_to_index(const struct kmem_cache *cache,
480 const struct slab *slab, void *obj) 406 const struct page *page, void *obj)
481{ 407{
482 u32 offset = (obj - slab->s_mem); 408 u32 offset = (obj - page->s_mem);
483 return reciprocal_divide(offset, cache->reciprocal_buffer_size); 409 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
484} 410}
485 411
@@ -641,7 +567,7 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
641 567
642static size_t slab_mgmt_size(size_t nr_objs, size_t align) 568static size_t slab_mgmt_size(size_t nr_objs, size_t align)
643{ 569{
644 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); 570 return ALIGN(nr_objs * sizeof(unsigned int), align);
645} 571}
646 572
647/* 573/*
@@ -660,8 +586,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
660 * on it. For the latter case, the memory allocated for a 586 * on it. For the latter case, the memory allocated for a
661 * slab is used for: 587 * slab is used for:
662 * 588 *
663 * - The struct slab 589 * - One unsigned int for each object
664 * - One kmem_bufctl_t for each object
665 * - Padding to respect alignment of @align 590 * - Padding to respect alignment of @align
666 * - @buffer_size bytes for each object 591 * - @buffer_size bytes for each object
667 * 592 *
@@ -674,8 +599,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
674 mgmt_size = 0; 599 mgmt_size = 0;
675 nr_objs = slab_size / buffer_size; 600 nr_objs = slab_size / buffer_size;
676 601
677 if (nr_objs > SLAB_LIMIT)
678 nr_objs = SLAB_LIMIT;
679 } else { 602 } else {
680 /* 603 /*
681 * Ignore padding for the initial guess. The padding 604 * Ignore padding for the initial guess. The padding
@@ -685,8 +608,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
685 * into the memory allocation when taking the padding 608 * into the memory allocation when taking the padding
686 * into account. 609 * into account.
687 */ 610 */
688 nr_objs = (slab_size - sizeof(struct slab)) / 611 nr_objs = (slab_size) / (buffer_size + sizeof(unsigned int));
689 (buffer_size + sizeof(kmem_bufctl_t));
690 612
691 /* 613 /*
692 * This calculated number will be either the right 614 * This calculated number will be either the right
@@ -696,9 +618,6 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
696 > slab_size) 618 > slab_size)
697 nr_objs--; 619 nr_objs--;
698 620
699 if (nr_objs > SLAB_LIMIT)
700 nr_objs = SLAB_LIMIT;
701
702 mgmt_size = slab_mgmt_size(nr_objs, align); 621 mgmt_size = slab_mgmt_size(nr_objs, align);
703 } 622 }
704 *num = nr_objs; 623 *num = nr_objs;
@@ -829,10 +748,8 @@ static struct array_cache *alloc_arraycache(int node, int entries,
829 return nc; 748 return nc;
830} 749}
831 750
832static inline bool is_slab_pfmemalloc(struct slab *slabp) 751static inline bool is_slab_pfmemalloc(struct page *page)
833{ 752{
834 struct page *page = virt_to_page(slabp->s_mem);
835
836 return PageSlabPfmemalloc(page); 753 return PageSlabPfmemalloc(page);
837} 754}
838 755
@@ -841,23 +758,23 @@ static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
841 struct array_cache *ac) 758 struct array_cache *ac)
842{ 759{
843 struct kmem_cache_node *n = cachep->node[numa_mem_id()]; 760 struct kmem_cache_node *n = cachep->node[numa_mem_id()];
844 struct slab *slabp; 761 struct page *page;
845 unsigned long flags; 762 unsigned long flags;
846 763
847 if (!pfmemalloc_active) 764 if (!pfmemalloc_active)
848 return; 765 return;
849 766
850 spin_lock_irqsave(&n->list_lock, flags); 767 spin_lock_irqsave(&n->list_lock, flags);
851 list_for_each_entry(slabp, &n->slabs_full, list) 768 list_for_each_entry(page, &n->slabs_full, lru)
852 if (is_slab_pfmemalloc(slabp)) 769 if (is_slab_pfmemalloc(page))
853 goto out; 770 goto out;
854 771
855 list_for_each_entry(slabp, &n->slabs_partial, list) 772 list_for_each_entry(page, &n->slabs_partial, lru)
856 if (is_slab_pfmemalloc(slabp)) 773 if (is_slab_pfmemalloc(page))
857 goto out; 774 goto out;
858 775
859 list_for_each_entry(slabp, &n->slabs_free, list) 776 list_for_each_entry(page, &n->slabs_free, lru)
860 if (is_slab_pfmemalloc(slabp)) 777 if (is_slab_pfmemalloc(page))
861 goto out; 778 goto out;
862 779
863 pfmemalloc_active = false; 780 pfmemalloc_active = false;
@@ -897,8 +814,8 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
897 */ 814 */
898 n = cachep->node[numa_mem_id()]; 815 n = cachep->node[numa_mem_id()];
899 if (!list_empty(&n->slabs_free) && force_refill) { 816 if (!list_empty(&n->slabs_free) && force_refill) {
900 struct slab *slabp = virt_to_slab(objp); 817 struct page *page = virt_to_head_page(objp);
901 ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem)); 818 ClearPageSlabPfmemalloc(page);
902 clear_obj_pfmemalloc(&objp); 819 clear_obj_pfmemalloc(&objp);
903 recheck_pfmemalloc_active(cachep, ac); 820 recheck_pfmemalloc_active(cachep, ac);
904 return objp; 821 return objp;
@@ -1099,8 +1016,7 @@ static void drain_alien_cache(struct kmem_cache *cachep,
1099 1016
1100static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) 1017static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1101{ 1018{
1102 struct slab *slabp = virt_to_slab(objp); 1019 int nodeid = page_to_nid(virt_to_page(objp));
1103 int nodeid = slabp->nodeid;
1104 struct kmem_cache_node *n; 1020 struct kmem_cache_node *n;
1105 struct array_cache *alien = NULL; 1021 struct array_cache *alien = NULL;
1106 int node; 1022 int node;
@@ -1111,7 +1027,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1111 * Make sure we are not freeing a object from another node to the array 1027 * Make sure we are not freeing a object from another node to the array
1112 * cache on this cpu. 1028 * cache on this cpu.
1113 */ 1029 */
1114 if (likely(slabp->nodeid == node)) 1030 if (likely(nodeid == node))
1115 return 0; 1031 return 0;
1116 1032
1117 n = cachep->node[node]; 1033 n = cachep->node[node];
@@ -1512,6 +1428,8 @@ void __init kmem_cache_init(void)
1512{ 1428{
1513 int i; 1429 int i;
1514 1430
1431 BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
1432 sizeof(struct rcu_head));
1515 kmem_cache = &kmem_cache_boot; 1433 kmem_cache = &kmem_cache_boot;
1516 setup_node_pointer(kmem_cache); 1434 setup_node_pointer(kmem_cache);
1517 1435
@@ -1687,7 +1605,7 @@ static noinline void
1687slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) 1605slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
1688{ 1606{
1689 struct kmem_cache_node *n; 1607 struct kmem_cache_node *n;
1690 struct slab *slabp; 1608 struct page *page;
1691 unsigned long flags; 1609 unsigned long flags;
1692 int node; 1610 int node;
1693 1611
@@ -1706,15 +1624,15 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
1706 continue; 1624 continue;
1707 1625
1708 spin_lock_irqsave(&n->list_lock, flags); 1626 spin_lock_irqsave(&n->list_lock, flags);
1709 list_for_each_entry(slabp, &n->slabs_full, list) { 1627 list_for_each_entry(page, &n->slabs_full, lru) {
1710 active_objs += cachep->num; 1628 active_objs += cachep->num;
1711 active_slabs++; 1629 active_slabs++;
1712 } 1630 }
1713 list_for_each_entry(slabp, &n->slabs_partial, list) { 1631 list_for_each_entry(page, &n->slabs_partial, lru) {
1714 active_objs += slabp->inuse; 1632 active_objs += page->active;
1715 active_slabs++; 1633 active_slabs++;
1716 } 1634 }
1717 list_for_each_entry(slabp, &n->slabs_free, list) 1635 list_for_each_entry(page, &n->slabs_free, lru)
1718 num_slabs++; 1636 num_slabs++;
1719 1637
1720 free_objects += n->free_objects; 1638 free_objects += n->free_objects;
@@ -1736,19 +1654,11 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
1736 * did not request dmaable memory, we might get it, but that 1654 * did not request dmaable memory, we might get it, but that
1737 * would be relatively rare and ignorable. 1655 * would be relatively rare and ignorable.
1738 */ 1656 */
1739static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) 1657static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
1658 int nodeid)
1740{ 1659{
1741 struct page *page; 1660 struct page *page;
1742 int nr_pages; 1661 int nr_pages;
1743 int i;
1744
1745#ifndef CONFIG_MMU
1746 /*
1747 * Nommu uses slab's for process anonymous memory allocations, and thus
1748 * requires __GFP_COMP to properly refcount higher order allocations
1749 */
1750 flags |= __GFP_COMP;
1751#endif
1752 1662
1753 flags |= cachep->allocflags; 1663 flags |= cachep->allocflags;
1754 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1664 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
@@ -1772,12 +1682,9 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1772 else 1682 else
1773 add_zone_page_state(page_zone(page), 1683 add_zone_page_state(page_zone(page),
1774 NR_SLAB_UNRECLAIMABLE, nr_pages); 1684 NR_SLAB_UNRECLAIMABLE, nr_pages);
1775 for (i = 0; i < nr_pages; i++) { 1685 __SetPageSlab(page);
1776 __SetPageSlab(page + i); 1686 if (page->pfmemalloc)
1777 1687 SetPageSlabPfmemalloc(page);
1778 if (page->pfmemalloc)
1779 SetPageSlabPfmemalloc(page + i);
1780 }
1781 memcg_bind_pages(cachep, cachep->gfporder); 1688 memcg_bind_pages(cachep, cachep->gfporder);
1782 1689
1783 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { 1690 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
@@ -1789,17 +1696,15 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1789 kmemcheck_mark_unallocated_pages(page, nr_pages); 1696 kmemcheck_mark_unallocated_pages(page, nr_pages);
1790 } 1697 }
1791 1698
1792 return page_address(page); 1699 return page;
1793} 1700}
1794 1701
1795/* 1702/*
1796 * Interface to system's page release. 1703 * Interface to system's page release.
1797 */ 1704 */
1798static void kmem_freepages(struct kmem_cache *cachep, void *addr) 1705static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
1799{ 1706{
1800 unsigned long i = (1 << cachep->gfporder); 1707 const unsigned long nr_freed = (1 << cachep->gfporder);
1801 struct page *page = virt_to_page(addr);
1802 const unsigned long nr_freed = i;
1803 1708
1804 kmemcheck_free_shadow(page, cachep->gfporder); 1709 kmemcheck_free_shadow(page, cachep->gfporder);
1805 1710
@@ -1809,27 +1714,28 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1809 else 1714 else
1810 sub_zone_page_state(page_zone(page), 1715 sub_zone_page_state(page_zone(page),
1811 NR_SLAB_UNRECLAIMABLE, nr_freed); 1716 NR_SLAB_UNRECLAIMABLE, nr_freed);
1812 while (i--) { 1717
1813 BUG_ON(!PageSlab(page)); 1718 BUG_ON(!PageSlab(page));
1814 __ClearPageSlabPfmemalloc(page); 1719 __ClearPageSlabPfmemalloc(page);
1815 __ClearPageSlab(page); 1720 __ClearPageSlab(page);
1816 page++; 1721 page_mapcount_reset(page);
1817 } 1722 page->mapping = NULL;
1818 1723
1819 memcg_release_pages(cachep, cachep->gfporder); 1724 memcg_release_pages(cachep, cachep->gfporder);
1820 if (current->reclaim_state) 1725 if (current->reclaim_state)
1821 current->reclaim_state->reclaimed_slab += nr_freed; 1726 current->reclaim_state->reclaimed_slab += nr_freed;
1822 free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder); 1727 __free_memcg_kmem_pages(page, cachep->gfporder);
1823} 1728}
1824 1729
1825static void kmem_rcu_free(struct rcu_head *head) 1730static void kmem_rcu_free(struct rcu_head *head)
1826{ 1731{
1827 struct slab_rcu *slab_rcu = (struct slab_rcu *)head; 1732 struct kmem_cache *cachep;
1828 struct kmem_cache *cachep = slab_rcu->cachep; 1733 struct page *page;
1829 1734
1830 kmem_freepages(cachep, slab_rcu->addr); 1735 page = container_of(head, struct page, rcu_head);
1831 if (OFF_SLAB(cachep)) 1736 cachep = page->slab_cache;
1832 kmem_cache_free(cachep->slabp_cache, slab_rcu); 1737
1738 kmem_freepages(cachep, page);
1833} 1739}
1834 1740
1835#if DEBUG 1741#if DEBUG
@@ -1978,19 +1884,19 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1978 /* Print some data about the neighboring objects, if they 1884 /* Print some data about the neighboring objects, if they
1979 * exist: 1885 * exist:
1980 */ 1886 */
1981 struct slab *slabp = virt_to_slab(objp); 1887 struct page *page = virt_to_head_page(objp);
1982 unsigned int objnr; 1888 unsigned int objnr;
1983 1889
1984 objnr = obj_to_index(cachep, slabp, objp); 1890 objnr = obj_to_index(cachep, page, objp);
1985 if (objnr) { 1891 if (objnr) {
1986 objp = index_to_obj(cachep, slabp, objnr - 1); 1892 objp = index_to_obj(cachep, page, objnr - 1);
1987 realobj = (char *)objp + obj_offset(cachep); 1893 realobj = (char *)objp + obj_offset(cachep);
1988 printk(KERN_ERR "Prev obj: start=%p, len=%d\n", 1894 printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
1989 realobj, size); 1895 realobj, size);
1990 print_objinfo(cachep, objp, 2); 1896 print_objinfo(cachep, objp, 2);
1991 } 1897 }
1992 if (objnr + 1 < cachep->num) { 1898 if (objnr + 1 < cachep->num) {
1993 objp = index_to_obj(cachep, slabp, objnr + 1); 1899 objp = index_to_obj(cachep, page, objnr + 1);
1994 realobj = (char *)objp + obj_offset(cachep); 1900 realobj = (char *)objp + obj_offset(cachep);
1995 printk(KERN_ERR "Next obj: start=%p, len=%d\n", 1901 printk(KERN_ERR "Next obj: start=%p, len=%d\n",
1996 realobj, size); 1902 realobj, size);
@@ -2001,11 +1907,12 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
2001#endif 1907#endif
2002 1908
2003#if DEBUG 1909#if DEBUG
2004static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) 1910static void slab_destroy_debugcheck(struct kmem_cache *cachep,
1911 struct page *page)
2005{ 1912{
2006 int i; 1913 int i;
2007 for (i = 0; i < cachep->num; i++) { 1914 for (i = 0; i < cachep->num; i++) {
2008 void *objp = index_to_obj(cachep, slabp, i); 1915 void *objp = index_to_obj(cachep, page, i);
2009 1916
2010 if (cachep->flags & SLAB_POISON) { 1917 if (cachep->flags & SLAB_POISON) {
2011#ifdef CONFIG_DEBUG_PAGEALLOC 1918#ifdef CONFIG_DEBUG_PAGEALLOC
@@ -2030,7 +1937,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
2030 } 1937 }
2031} 1938}
2032#else 1939#else
2033static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp) 1940static void slab_destroy_debugcheck(struct kmem_cache *cachep,
1941 struct page *page)
2034{ 1942{
2035} 1943}
2036#endif 1944#endif
@@ -2044,23 +1952,34 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
2044 * Before calling the slab must have been unlinked from the cache. The 1952 * Before calling the slab must have been unlinked from the cache. The
2045 * cache-lock is not held/needed. 1953 * cache-lock is not held/needed.
2046 */ 1954 */
2047static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) 1955static void slab_destroy(struct kmem_cache *cachep, struct page *page)
2048{ 1956{
2049 void *addr = slabp->s_mem - slabp->colouroff; 1957 void *freelist;
2050 1958
2051 slab_destroy_debugcheck(cachep, slabp); 1959 freelist = page->freelist;
1960 slab_destroy_debugcheck(cachep, page);
2052 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) { 1961 if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
2053 struct slab_rcu *slab_rcu; 1962 struct rcu_head *head;
1963
1964 /*
1965 * RCU free overloads the RCU head over the LRU.
1966 * slab_page has been overloeaded over the LRU,
1967 * however it is not used from now on so that
1968 * we can use it safely.
1969 */
1970 head = (void *)&page->rcu_head;
1971 call_rcu(head, kmem_rcu_free);
2054 1972
2055 slab_rcu = (struct slab_rcu *)slabp;
2056 slab_rcu->cachep = cachep;
2057 slab_rcu->addr = addr;
2058 call_rcu(&slab_rcu->head, kmem_rcu_free);
2059 } else { 1973 } else {
2060 kmem_freepages(cachep, addr); 1974 kmem_freepages(cachep, page);
2061 if (OFF_SLAB(cachep))
2062 kmem_cache_free(cachep->slabp_cache, slabp);
2063 } 1975 }
1976
1977 /*
1978 * From now on, we don't use freelist
1979 * although actual page can be freed in rcu context
1980 */
1981 if (OFF_SLAB(cachep))
1982 kmem_cache_free(cachep->freelist_cache, freelist);
2064} 1983}
2065 1984
2066/** 1985/**
@@ -2097,8 +2016,8 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
2097 * use off-slab slabs. Needed to avoid a possible 2016 * use off-slab slabs. Needed to avoid a possible
2098 * looping condition in cache_grow(). 2017 * looping condition in cache_grow().
2099 */ 2018 */
2100 offslab_limit = size - sizeof(struct slab); 2019 offslab_limit = size;
2101 offslab_limit /= sizeof(kmem_bufctl_t); 2020 offslab_limit /= sizeof(unsigned int);
2102 2021
2103 if (num > offslab_limit) 2022 if (num > offslab_limit)
2104 break; 2023 break;
@@ -2220,7 +2139,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2220int 2139int
2221__kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) 2140__kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2222{ 2141{
2223 size_t left_over, slab_size, ralign; 2142 size_t left_over, freelist_size, ralign;
2224 gfp_t gfp; 2143 gfp_t gfp;
2225 int err; 2144 int err;
2226 size_t size = cachep->size; 2145 size_t size = cachep->size;
@@ -2339,22 +2258,21 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2339 if (!cachep->num) 2258 if (!cachep->num)
2340 return -E2BIG; 2259 return -E2BIG;
2341 2260
2342 slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) 2261 freelist_size =
2343 + sizeof(struct slab), cachep->align); 2262 ALIGN(cachep->num * sizeof(unsigned int), cachep->align);
2344 2263
2345 /* 2264 /*
2346 * If the slab has been placed off-slab, and we have enough space then 2265 * If the slab has been placed off-slab, and we have enough space then
2347 * move it on-slab. This is at the expense of any extra colouring. 2266 * move it on-slab. This is at the expense of any extra colouring.
2348 */ 2267 */
2349 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) { 2268 if (flags & CFLGS_OFF_SLAB && left_over >= freelist_size) {
2350 flags &= ~CFLGS_OFF_SLAB; 2269 flags &= ~CFLGS_OFF_SLAB;
2351 left_over -= slab_size; 2270 left_over -= freelist_size;
2352 } 2271 }
2353 2272
2354 if (flags & CFLGS_OFF_SLAB) { 2273 if (flags & CFLGS_OFF_SLAB) {
2355 /* really off slab. No need for manual alignment */ 2274 /* really off slab. No need for manual alignment */
2356 slab_size = 2275 freelist_size = cachep->num * sizeof(unsigned int);
2357 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
2358 2276
2359#ifdef CONFIG_PAGE_POISONING 2277#ifdef CONFIG_PAGE_POISONING
2360 /* If we're going to use the generic kernel_map_pages() 2278 /* If we're going to use the generic kernel_map_pages()
@@ -2371,16 +2289,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2371 if (cachep->colour_off < cachep->align) 2289 if (cachep->colour_off < cachep->align)
2372 cachep->colour_off = cachep->align; 2290 cachep->colour_off = cachep->align;
2373 cachep->colour = left_over / cachep->colour_off; 2291 cachep->colour = left_over / cachep->colour_off;
2374 cachep->slab_size = slab_size; 2292 cachep->freelist_size = freelist_size;
2375 cachep->flags = flags; 2293 cachep->flags = flags;
2376 cachep->allocflags = 0; 2294 cachep->allocflags = __GFP_COMP;
2377 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) 2295 if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
2378 cachep->allocflags |= GFP_DMA; 2296 cachep->allocflags |= GFP_DMA;
2379 cachep->size = size; 2297 cachep->size = size;
2380 cachep->reciprocal_buffer_size = reciprocal_value(size); 2298 cachep->reciprocal_buffer_size = reciprocal_value(size);
2381 2299
2382 if (flags & CFLGS_OFF_SLAB) { 2300 if (flags & CFLGS_OFF_SLAB) {
2383 cachep->slabp_cache = kmalloc_slab(slab_size, 0u); 2301 cachep->freelist_cache = kmalloc_slab(freelist_size, 0u);
2384 /* 2302 /*
2385 * This is a possibility for one of the malloc_sizes caches. 2303 * This is a possibility for one of the malloc_sizes caches.
2386 * But since we go off slab only for object size greater than 2304 * But since we go off slab only for object size greater than
@@ -2388,7 +2306,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2388 * this should not happen at all. 2306 * this should not happen at all.
2389 * But leave a BUG_ON for some lucky dude. 2307 * But leave a BUG_ON for some lucky dude.
2390 */ 2308 */
2391 BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache)); 2309 BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache));
2392 } 2310 }
2393 2311
2394 err = setup_cpu_cache(cachep, gfp); 2312 err = setup_cpu_cache(cachep, gfp);
@@ -2494,7 +2412,7 @@ static int drain_freelist(struct kmem_cache *cache,
2494{ 2412{
2495 struct list_head *p; 2413 struct list_head *p;
2496 int nr_freed; 2414 int nr_freed;
2497 struct slab *slabp; 2415 struct page *page;
2498 2416
2499 nr_freed = 0; 2417 nr_freed = 0;
2500 while (nr_freed < tofree && !list_empty(&n->slabs_free)) { 2418 while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
@@ -2506,18 +2424,18 @@ static int drain_freelist(struct kmem_cache *cache,
2506 goto out; 2424 goto out;
2507 } 2425 }
2508 2426
2509 slabp = list_entry(p, struct slab, list); 2427 page = list_entry(p, struct page, lru);
2510#if DEBUG 2428#if DEBUG
2511 BUG_ON(slabp->inuse); 2429 BUG_ON(page->active);
2512#endif 2430#endif
2513 list_del(&slabp->list); 2431 list_del(&page->lru);
2514 /* 2432 /*
2515 * Safe to drop the lock. The slab is no longer linked 2433 * Safe to drop the lock. The slab is no longer linked
2516 * to the cache. 2434 * to the cache.
2517 */ 2435 */
2518 n->free_objects -= cache->num; 2436 n->free_objects -= cache->num;
2519 spin_unlock_irq(&n->list_lock); 2437 spin_unlock_irq(&n->list_lock);
2520 slab_destroy(cache, slabp); 2438 slab_destroy(cache, page);
2521 nr_freed++; 2439 nr_freed++;
2522 } 2440 }
2523out: 2441out:
@@ -2600,52 +2518,42 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
2600 * descriptors in kmem_cache_create, we search through the malloc_sizes array. 2518 * descriptors in kmem_cache_create, we search through the malloc_sizes array.
2601 * If we are creating a malloc_sizes cache here it would not be visible to 2519 * If we are creating a malloc_sizes cache here it would not be visible to
2602 * kmem_find_general_cachep till the initialization is complete. 2520 * kmem_find_general_cachep till the initialization is complete.
2603 * Hence we cannot have slabp_cache same as the original cache. 2521 * Hence we cannot have freelist_cache same as the original cache.
2604 */ 2522 */
2605static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, 2523static void *alloc_slabmgmt(struct kmem_cache *cachep,
2606 int colour_off, gfp_t local_flags, 2524 struct page *page, int colour_off,
2607 int nodeid) 2525 gfp_t local_flags, int nodeid)
2608{ 2526{
2609 struct slab *slabp; 2527 void *freelist;
2528 void *addr = page_address(page);
2610 2529
2611 if (OFF_SLAB(cachep)) { 2530 if (OFF_SLAB(cachep)) {
2612 /* Slab management obj is off-slab. */ 2531 /* Slab management obj is off-slab. */
2613 slabp = kmem_cache_alloc_node(cachep->slabp_cache, 2532 freelist = kmem_cache_alloc_node(cachep->freelist_cache,
2614 local_flags, nodeid); 2533 local_flags, nodeid);
2615 /* 2534 if (!freelist)
2616 * If the first object in the slab is leaked (it's allocated
2617 * but no one has a reference to it), we want to make sure
2618 * kmemleak does not treat the ->s_mem pointer as a reference
2619 * to the object. Otherwise we will not report the leak.
2620 */
2621 kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
2622 local_flags);
2623 if (!slabp)
2624 return NULL; 2535 return NULL;
2625 } else { 2536 } else {
2626 slabp = objp + colour_off; 2537 freelist = addr + colour_off;
2627 colour_off += cachep->slab_size; 2538 colour_off += cachep->freelist_size;
2628 } 2539 }
2629 slabp->inuse = 0; 2540 page->active = 0;
2630 slabp->colouroff = colour_off; 2541 page->s_mem = addr + colour_off;
2631 slabp->s_mem = objp + colour_off; 2542 return freelist;
2632 slabp->nodeid = nodeid;
2633 slabp->free = 0;
2634 return slabp;
2635} 2543}
2636 2544
2637static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp) 2545static inline unsigned int *slab_freelist(struct page *page)
2638{ 2546{
2639 return (kmem_bufctl_t *) (slabp + 1); 2547 return (unsigned int *)(page->freelist);
2640} 2548}
2641 2549
2642static void cache_init_objs(struct kmem_cache *cachep, 2550static void cache_init_objs(struct kmem_cache *cachep,
2643 struct slab *slabp) 2551 struct page *page)
2644{ 2552{
2645 int i; 2553 int i;
2646 2554
2647 for (i = 0; i < cachep->num; i++) { 2555 for (i = 0; i < cachep->num; i++) {
2648 void *objp = index_to_obj(cachep, slabp, i); 2556 void *objp = index_to_obj(cachep, page, i);
2649#if DEBUG 2557#if DEBUG
2650 /* need to poison the objs? */ 2558 /* need to poison the objs? */
2651 if (cachep->flags & SLAB_POISON) 2559 if (cachep->flags & SLAB_POISON)
@@ -2681,9 +2589,8 @@ static void cache_init_objs(struct kmem_cache *cachep,
2681 if (cachep->ctor) 2589 if (cachep->ctor)
2682 cachep->ctor(objp); 2590 cachep->ctor(objp);
2683#endif 2591#endif
2684 slab_bufctl(slabp)[i] = i + 1; 2592 slab_freelist(page)[i] = i;
2685 } 2593 }
2686 slab_bufctl(slabp)[i - 1] = BUFCTL_END;
2687} 2594}
2688 2595
2689static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) 2596static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
@@ -2696,41 +2603,41 @@ static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2696 } 2603 }
2697} 2604}
2698 2605
2699static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, 2606static void *slab_get_obj(struct kmem_cache *cachep, struct page *page,
2700 int nodeid) 2607 int nodeid)
2701{ 2608{
2702 void *objp = index_to_obj(cachep, slabp, slabp->free); 2609 void *objp;
2703 kmem_bufctl_t next;
2704 2610
2705 slabp->inuse++; 2611 objp = index_to_obj(cachep, page, slab_freelist(page)[page->active]);
2706 next = slab_bufctl(slabp)[slabp->free]; 2612 page->active++;
2707#if DEBUG 2613#if DEBUG
2708 slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE; 2614 WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
2709 WARN_ON(slabp->nodeid != nodeid);
2710#endif 2615#endif
2711 slabp->free = next;
2712 2616
2713 return objp; 2617 return objp;
2714} 2618}
2715 2619
2716static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, 2620static void slab_put_obj(struct kmem_cache *cachep, struct page *page,
2717 void *objp, int nodeid) 2621 void *objp, int nodeid)
2718{ 2622{
2719 unsigned int objnr = obj_to_index(cachep, slabp, objp); 2623 unsigned int objnr = obj_to_index(cachep, page, objp);
2720
2721#if DEBUG 2624#if DEBUG
2625 unsigned int i;
2626
2722 /* Verify that the slab belongs to the intended node */ 2627 /* Verify that the slab belongs to the intended node */
2723 WARN_ON(slabp->nodeid != nodeid); 2628 WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
2724 2629
2725 if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) { 2630 /* Verify double free bug */
2726 printk(KERN_ERR "slab: double free detected in cache " 2631 for (i = page->active; i < cachep->num; i++) {
2727 "'%s', objp %p\n", cachep->name, objp); 2632 if (slab_freelist(page)[i] == objnr) {
2728 BUG(); 2633 printk(KERN_ERR "slab: double free detected in cache "
2634 "'%s', objp %p\n", cachep->name, objp);
2635 BUG();
2636 }
2729 } 2637 }
2730#endif 2638#endif
2731 slab_bufctl(slabp)[objnr] = slabp->free; 2639 page->active--;
2732 slabp->free = objnr; 2640 slab_freelist(page)[page->active] = objnr;
2733 slabp->inuse--;
2734} 2641}
2735 2642
2736/* 2643/*
@@ -2738,23 +2645,11 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2738 * for the slab allocator to be able to lookup the cache and slab of a 2645 * for the slab allocator to be able to lookup the cache and slab of a
2739 * virtual address for kfree, ksize, and slab debugging. 2646 * virtual address for kfree, ksize, and slab debugging.
2740 */ 2647 */
2741static void slab_map_pages(struct kmem_cache *cache, struct slab *slab, 2648static void slab_map_pages(struct kmem_cache *cache, struct page *page,
2742 void *addr) 2649 void *freelist)
2743{ 2650{
2744 int nr_pages; 2651 page->slab_cache = cache;
2745 struct page *page; 2652 page->freelist = freelist;
2746
2747 page = virt_to_page(addr);
2748
2749 nr_pages = 1;
2750 if (likely(!PageCompound(page)))
2751 nr_pages <<= cache->gfporder;
2752
2753 do {
2754 page->slab_cache = cache;
2755 page->slab_page = slab;
2756 page++;
2757 } while (--nr_pages);
2758} 2653}
2759 2654
2760/* 2655/*
@@ -2762,9 +2657,9 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2762 * kmem_cache_alloc() when there are no active objs left in a cache. 2657 * kmem_cache_alloc() when there are no active objs left in a cache.
2763 */ 2658 */
2764static int cache_grow(struct kmem_cache *cachep, 2659static int cache_grow(struct kmem_cache *cachep,
2765 gfp_t flags, int nodeid, void *objp) 2660 gfp_t flags, int nodeid, struct page *page)
2766{ 2661{
2767 struct slab *slabp; 2662 void *freelist;
2768 size_t offset; 2663 size_t offset;
2769 gfp_t local_flags; 2664 gfp_t local_flags;
2770 struct kmem_cache_node *n; 2665 struct kmem_cache_node *n;
@@ -2805,20 +2700,20 @@ static int cache_grow(struct kmem_cache *cachep,
2805 * Get mem for the objs. Attempt to allocate a physical page from 2700 * Get mem for the objs. Attempt to allocate a physical page from
2806 * 'nodeid'. 2701 * 'nodeid'.
2807 */ 2702 */
2808 if (!objp) 2703 if (!page)
2809 objp = kmem_getpages(cachep, local_flags, nodeid); 2704 page = kmem_getpages(cachep, local_flags, nodeid);
2810 if (!objp) 2705 if (!page)
2811 goto failed; 2706 goto failed;
2812 2707
2813 /* Get slab management. */ 2708 /* Get slab management. */
2814 slabp = alloc_slabmgmt(cachep, objp, offset, 2709 freelist = alloc_slabmgmt(cachep, page, offset,
2815 local_flags & ~GFP_CONSTRAINT_MASK, nodeid); 2710 local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
2816 if (!slabp) 2711 if (!freelist)
2817 goto opps1; 2712 goto opps1;
2818 2713
2819 slab_map_pages(cachep, slabp, objp); 2714 slab_map_pages(cachep, page, freelist);
2820 2715
2821 cache_init_objs(cachep, slabp); 2716 cache_init_objs(cachep, page);
2822 2717
2823 if (local_flags & __GFP_WAIT) 2718 if (local_flags & __GFP_WAIT)
2824 local_irq_disable(); 2719 local_irq_disable();
@@ -2826,13 +2721,13 @@ static int cache_grow(struct kmem_cache *cachep,
2826 spin_lock(&n->list_lock); 2721 spin_lock(&n->list_lock);
2827 2722
2828 /* Make slab active. */ 2723 /* Make slab active. */
2829 list_add_tail(&slabp->list, &(n->slabs_free)); 2724 list_add_tail(&page->lru, &(n->slabs_free));
2830 STATS_INC_GROWN(cachep); 2725 STATS_INC_GROWN(cachep);
2831 n->free_objects += cachep->num; 2726 n->free_objects += cachep->num;
2832 spin_unlock(&n->list_lock); 2727 spin_unlock(&n->list_lock);
2833 return 1; 2728 return 1;
2834opps1: 2729opps1:
2835 kmem_freepages(cachep, objp); 2730 kmem_freepages(cachep, page);
2836failed: 2731failed:
2837 if (local_flags & __GFP_WAIT) 2732 if (local_flags & __GFP_WAIT)
2838 local_irq_disable(); 2733 local_irq_disable();
@@ -2880,9 +2775,8 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2880static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, 2775static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2881 unsigned long caller) 2776 unsigned long caller)
2882{ 2777{
2883 struct page *page;
2884 unsigned int objnr; 2778 unsigned int objnr;
2885 struct slab *slabp; 2779 struct page *page;
2886 2780
2887 BUG_ON(virt_to_cache(objp) != cachep); 2781 BUG_ON(virt_to_cache(objp) != cachep);
2888 2782
@@ -2890,8 +2784,6 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2890 kfree_debugcheck(objp); 2784 kfree_debugcheck(objp);
2891 page = virt_to_head_page(objp); 2785 page = virt_to_head_page(objp);
2892 2786
2893 slabp = page->slab_page;
2894
2895 if (cachep->flags & SLAB_RED_ZONE) { 2787 if (cachep->flags & SLAB_RED_ZONE) {
2896 verify_redzone_free(cachep, objp); 2788 verify_redzone_free(cachep, objp);
2897 *dbg_redzone1(cachep, objp) = RED_INACTIVE; 2789 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
@@ -2900,14 +2792,11 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2900 if (cachep->flags & SLAB_STORE_USER) 2792 if (cachep->flags & SLAB_STORE_USER)
2901 *dbg_userword(cachep, objp) = (void *)caller; 2793 *dbg_userword(cachep, objp) = (void *)caller;
2902 2794
2903 objnr = obj_to_index(cachep, slabp, objp); 2795 objnr = obj_to_index(cachep, page, objp);
2904 2796
2905 BUG_ON(objnr >= cachep->num); 2797 BUG_ON(objnr >= cachep->num);
2906 BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); 2798 BUG_ON(objp != index_to_obj(cachep, page, objnr));
2907 2799
2908#ifdef CONFIG_DEBUG_SLAB_LEAK
2909 slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
2910#endif
2911 if (cachep->flags & SLAB_POISON) { 2800 if (cachep->flags & SLAB_POISON) {
2912#ifdef CONFIG_DEBUG_PAGEALLOC 2801#ifdef CONFIG_DEBUG_PAGEALLOC
2913 if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { 2802 if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
@@ -2924,33 +2813,9 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2924 return objp; 2813 return objp;
2925} 2814}
2926 2815
2927static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
2928{
2929 kmem_bufctl_t i;
2930 int entries = 0;
2931
2932 /* Check slab's freelist to see if this obj is there. */
2933 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
2934 entries++;
2935 if (entries > cachep->num || i >= cachep->num)
2936 goto bad;
2937 }
2938 if (entries != cachep->num - slabp->inuse) {
2939bad:
2940 printk(KERN_ERR "slab: Internal list corruption detected in "
2941 "cache '%s'(%d), slabp %p(%d). Tainted(%s). Hexdump:\n",
2942 cachep->name, cachep->num, slabp, slabp->inuse,
2943 print_tainted());
2944 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 16, 1, slabp,
2945 sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t),
2946 1);
2947 BUG();
2948 }
2949}
2950#else 2816#else
2951#define kfree_debugcheck(x) do { } while(0) 2817#define kfree_debugcheck(x) do { } while(0)
2952#define cache_free_debugcheck(x,objp,z) (objp) 2818#define cache_free_debugcheck(x,objp,z) (objp)
2953#define check_slabp(x,y) do { } while(0)
2954#endif 2819#endif
2955 2820
2956static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, 2821static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
@@ -2989,7 +2854,7 @@ retry:
2989 2854
2990 while (batchcount > 0) { 2855 while (batchcount > 0) {
2991 struct list_head *entry; 2856 struct list_head *entry;
2992 struct slab *slabp; 2857 struct page *page;
2993 /* Get slab alloc is to come from. */ 2858 /* Get slab alloc is to come from. */
2994 entry = n->slabs_partial.next; 2859 entry = n->slabs_partial.next;
2995 if (entry == &n->slabs_partial) { 2860 if (entry == &n->slabs_partial) {
@@ -2999,8 +2864,7 @@ retry:
2999 goto must_grow; 2864 goto must_grow;
3000 } 2865 }
3001 2866
3002 slabp = list_entry(entry, struct slab, list); 2867 page = list_entry(entry, struct page, lru);
3003 check_slabp(cachep, slabp);
3004 check_spinlock_acquired(cachep); 2868 check_spinlock_acquired(cachep);
3005 2869
3006 /* 2870 /*
@@ -3008,24 +2872,23 @@ retry:
3008 * there must be at least one object available for 2872 * there must be at least one object available for
3009 * allocation. 2873 * allocation.
3010 */ 2874 */
3011 BUG_ON(slabp->inuse >= cachep->num); 2875 BUG_ON(page->active >= cachep->num);
3012 2876
3013 while (slabp->inuse < cachep->num && batchcount--) { 2877 while (page->active < cachep->num && batchcount--) {
3014 STATS_INC_ALLOCED(cachep); 2878 STATS_INC_ALLOCED(cachep);
3015 STATS_INC_ACTIVE(cachep); 2879 STATS_INC_ACTIVE(cachep);
3016 STATS_SET_HIGH(cachep); 2880 STATS_SET_HIGH(cachep);
3017 2881
3018 ac_put_obj(cachep, ac, slab_get_obj(cachep, slabp, 2882 ac_put_obj(cachep, ac, slab_get_obj(cachep, page,
3019 node)); 2883 node));
3020 } 2884 }
3021 check_slabp(cachep, slabp);
3022 2885
3023 /* move slabp to correct slabp list: */ 2886 /* move slabp to correct slabp list: */
3024 list_del(&slabp->list); 2887 list_del(&page->lru);
3025 if (slabp->free == BUFCTL_END) 2888 if (page->active == cachep->num)
3026 list_add(&slabp->list, &n->slabs_full); 2889 list_add(&page->list, &n->slabs_full);
3027 else 2890 else
3028 list_add(&slabp->list, &n->slabs_partial); 2891 list_add(&page->list, &n->slabs_partial);
3029 } 2892 }
3030 2893
3031must_grow: 2894must_grow:
@@ -3097,16 +2960,6 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3097 *dbg_redzone1(cachep, objp) = RED_ACTIVE; 2960 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
3098 *dbg_redzone2(cachep, objp) = RED_ACTIVE; 2961 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
3099 } 2962 }
3100#ifdef CONFIG_DEBUG_SLAB_LEAK
3101 {
3102 struct slab *slabp;
3103 unsigned objnr;
3104
3105 slabp = virt_to_head_page(objp)->slab_page;
3106 objnr = (unsigned)(objp - slabp->s_mem) / cachep->size;
3107 slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
3108 }
3109#endif
3110 objp += obj_offset(cachep); 2963 objp += obj_offset(cachep);
3111 if (cachep->ctor && cachep->flags & SLAB_POISON) 2964 if (cachep->ctor && cachep->flags & SLAB_POISON)
3112 cachep->ctor(objp); 2965 cachep->ctor(objp);
@@ -3248,18 +3101,20 @@ retry:
3248 * We may trigger various forms of reclaim on the allowed 3101 * We may trigger various forms of reclaim on the allowed
3249 * set and go into memory reserves if necessary. 3102 * set and go into memory reserves if necessary.
3250 */ 3103 */
3104 struct page *page;
3105
3251 if (local_flags & __GFP_WAIT) 3106 if (local_flags & __GFP_WAIT)
3252 local_irq_enable(); 3107 local_irq_enable();
3253 kmem_flagcheck(cache, flags); 3108 kmem_flagcheck(cache, flags);
3254 obj = kmem_getpages(cache, local_flags, numa_mem_id()); 3109 page = kmem_getpages(cache, local_flags, numa_mem_id());
3255 if (local_flags & __GFP_WAIT) 3110 if (local_flags & __GFP_WAIT)
3256 local_irq_disable(); 3111 local_irq_disable();
3257 if (obj) { 3112 if (page) {
3258 /* 3113 /*
3259 * Insert into the appropriate per node queues 3114 * Insert into the appropriate per node queues
3260 */ 3115 */
3261 nid = page_to_nid(virt_to_page(obj)); 3116 nid = page_to_nid(page);
3262 if (cache_grow(cache, flags, nid, obj)) { 3117 if (cache_grow(cache, flags, nid, page)) {
3263 obj = ____cache_alloc_node(cache, 3118 obj = ____cache_alloc_node(cache,
3264 flags | GFP_THISNODE, nid); 3119 flags | GFP_THISNODE, nid);
3265 if (!obj) 3120 if (!obj)
@@ -3288,7 +3143,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3288 int nodeid) 3143 int nodeid)
3289{ 3144{
3290 struct list_head *entry; 3145 struct list_head *entry;
3291 struct slab *slabp; 3146 struct page *page;
3292 struct kmem_cache_node *n; 3147 struct kmem_cache_node *n;
3293 void *obj; 3148 void *obj;
3294 int x; 3149 int x;
@@ -3308,26 +3163,24 @@ retry:
3308 goto must_grow; 3163 goto must_grow;
3309 } 3164 }
3310 3165
3311 slabp = list_entry(entry, struct slab, list); 3166 page = list_entry(entry, struct page, lru);
3312 check_spinlock_acquired_node(cachep, nodeid); 3167 check_spinlock_acquired_node(cachep, nodeid);
3313 check_slabp(cachep, slabp);
3314 3168
3315 STATS_INC_NODEALLOCS(cachep); 3169 STATS_INC_NODEALLOCS(cachep);
3316 STATS_INC_ACTIVE(cachep); 3170 STATS_INC_ACTIVE(cachep);
3317 STATS_SET_HIGH(cachep); 3171 STATS_SET_HIGH(cachep);
3318 3172
3319 BUG_ON(slabp->inuse == cachep->num); 3173 BUG_ON(page->active == cachep->num);
3320 3174
3321 obj = slab_get_obj(cachep, slabp, nodeid); 3175 obj = slab_get_obj(cachep, page, nodeid);
3322 check_slabp(cachep, slabp);
3323 n->free_objects--; 3176 n->free_objects--;
3324 /* move slabp to correct slabp list: */ 3177 /* move slabp to correct slabp list: */
3325 list_del(&slabp->list); 3178 list_del(&page->lru);
3326 3179
3327 if (slabp->free == BUFCTL_END) 3180 if (page->active == cachep->num)
3328 list_add(&slabp->list, &n->slabs_full); 3181 list_add(&page->lru, &n->slabs_full);
3329 else 3182 else
3330 list_add(&slabp->list, &n->slabs_partial); 3183 list_add(&page->lru, &n->slabs_partial);
3331 3184
3332 spin_unlock(&n->list_lock); 3185 spin_unlock(&n->list_lock);
3333 goto done; 3186 goto done;
@@ -3477,23 +3330,21 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3477 3330
3478 for (i = 0; i < nr_objects; i++) { 3331 for (i = 0; i < nr_objects; i++) {
3479 void *objp; 3332 void *objp;
3480 struct slab *slabp; 3333 struct page *page;
3481 3334
3482 clear_obj_pfmemalloc(&objpp[i]); 3335 clear_obj_pfmemalloc(&objpp[i]);
3483 objp = objpp[i]; 3336 objp = objpp[i];
3484 3337
3485 slabp = virt_to_slab(objp); 3338 page = virt_to_head_page(objp);
3486 n = cachep->node[node]; 3339 n = cachep->node[node];
3487 list_del(&slabp->list); 3340 list_del(&page->lru);
3488 check_spinlock_acquired_node(cachep, node); 3341 check_spinlock_acquired_node(cachep, node);
3489 check_slabp(cachep, slabp); 3342 slab_put_obj(cachep, page, objp, node);
3490 slab_put_obj(cachep, slabp, objp, node);
3491 STATS_DEC_ACTIVE(cachep); 3343 STATS_DEC_ACTIVE(cachep);
3492 n->free_objects++; 3344 n->free_objects++;
3493 check_slabp(cachep, slabp);
3494 3345
3495 /* fixup slab chains */ 3346 /* fixup slab chains */
3496 if (slabp->inuse == 0) { 3347 if (page->active == 0) {
3497 if (n->free_objects > n->free_limit) { 3348 if (n->free_objects > n->free_limit) {
3498 n->free_objects -= cachep->num; 3349 n->free_objects -= cachep->num;
3499 /* No need to drop any previously held 3350 /* No need to drop any previously held
@@ -3502,16 +3353,16 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3502 * a different cache, refer to comments before 3353 * a different cache, refer to comments before
3503 * alloc_slabmgmt. 3354 * alloc_slabmgmt.
3504 */ 3355 */
3505 slab_destroy(cachep, slabp); 3356 slab_destroy(cachep, page);
3506 } else { 3357 } else {
3507 list_add(&slabp->list, &n->slabs_free); 3358 list_add(&page->lru, &n->slabs_free);
3508 } 3359 }
3509 } else { 3360 } else {
3510 /* Unconditionally move a slab to the end of the 3361 /* Unconditionally move a slab to the end of the
3511 * partial list on free - maximum time for the 3362 * partial list on free - maximum time for the
3512 * other objects to be freed, too. 3363 * other objects to be freed, too.
3513 */ 3364 */
3514 list_add_tail(&slabp->list, &n->slabs_partial); 3365 list_add_tail(&page->lru, &n->slabs_partial);
3515 } 3366 }
3516 } 3367 }
3517} 3368}
@@ -3551,10 +3402,10 @@ free_done:
3551 3402
3552 p = n->slabs_free.next; 3403 p = n->slabs_free.next;
3553 while (p != &(n->slabs_free)) { 3404 while (p != &(n->slabs_free)) {
3554 struct slab *slabp; 3405 struct page *page;
3555 3406
3556 slabp = list_entry(p, struct slab, list); 3407 page = list_entry(p, struct page, lru);
3557 BUG_ON(slabp->inuse); 3408 BUG_ON(page->active);
3558 3409
3559 i++; 3410 i++;
3560 p = p->next; 3411 p = p->next;
@@ -4158,7 +4009,7 @@ out:
4158#ifdef CONFIG_SLABINFO 4009#ifdef CONFIG_SLABINFO
4159void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo) 4010void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
4160{ 4011{
4161 struct slab *slabp; 4012 struct page *page;
4162 unsigned long active_objs; 4013 unsigned long active_objs;
4163 unsigned long num_objs; 4014 unsigned long num_objs;
4164 unsigned long active_slabs = 0; 4015 unsigned long active_slabs = 0;
@@ -4178,23 +4029,23 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
4178 check_irq_on(); 4029 check_irq_on();
4179 spin_lock_irq(&n->list_lock); 4030 spin_lock_irq(&n->list_lock);
4180 4031
4181 list_for_each_entry(slabp, &n->slabs_full, list) { 4032 list_for_each_entry(page, &n->slabs_full, lru) {
4182 if (slabp->inuse != cachep->num && !error) 4033 if (page->active != cachep->num && !error)
4183 error = "slabs_full accounting error"; 4034 error = "slabs_full accounting error";
4184 active_objs += cachep->num; 4035 active_objs += cachep->num;
4185 active_slabs++; 4036 active_slabs++;
4186 } 4037 }
4187 list_for_each_entry(slabp, &n->slabs_partial, list) { 4038 list_for_each_entry(page, &n->slabs_partial, lru) {
4188 if (slabp->inuse == cachep->num && !error) 4039 if (page->active == cachep->num && !error)
4189 error = "slabs_partial inuse accounting error"; 4040 error = "slabs_partial accounting error";
4190 if (!slabp->inuse && !error) 4041 if (!page->active && !error)
4191 error = "slabs_partial/inuse accounting error"; 4042 error = "slabs_partial accounting error";
4192 active_objs += slabp->inuse; 4043 active_objs += page->active;
4193 active_slabs++; 4044 active_slabs++;
4194 } 4045 }
4195 list_for_each_entry(slabp, &n->slabs_free, list) { 4046 list_for_each_entry(page, &n->slabs_free, lru) {
4196 if (slabp->inuse && !error) 4047 if (page->active && !error)
4197 error = "slabs_free/inuse accounting error"; 4048 error = "slabs_free accounting error";
4198 num_slabs++; 4049 num_slabs++;
4199 } 4050 }
4200 free_objects += n->free_objects; 4051 free_objects += n->free_objects;
@@ -4346,15 +4197,27 @@ static inline int add_caller(unsigned long *n, unsigned long v)
4346 return 1; 4197 return 1;
4347} 4198}
4348 4199
4349static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s) 4200static void handle_slab(unsigned long *n, struct kmem_cache *c,
4201 struct page *page)
4350{ 4202{
4351 void *p; 4203 void *p;
4352 int i; 4204 int i, j;
4205
4353 if (n[0] == n[1]) 4206 if (n[0] == n[1])
4354 return; 4207 return;
4355 for (i = 0, p = s->s_mem; i < c->num; i++, p += c->size) { 4208 for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) {
4356 if (slab_bufctl(s)[i] != BUFCTL_ACTIVE) 4209 bool active = true;
4210
4211 for (j = page->active; j < c->num; j++) {
4212 /* Skip freed item */
4213 if (slab_freelist(page)[j] == i) {
4214 active = false;
4215 break;
4216 }
4217 }
4218 if (!active)
4357 continue; 4219 continue;
4220
4358 if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) 4221 if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
4359 return; 4222 return;
4360 } 4223 }
@@ -4379,7 +4242,7 @@ static void show_symbol(struct seq_file *m, unsigned long address)
4379static int leaks_show(struct seq_file *m, void *p) 4242static int leaks_show(struct seq_file *m, void *p)
4380{ 4243{
4381 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); 4244 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
4382 struct slab *slabp; 4245 struct page *page;
4383 struct kmem_cache_node *n; 4246 struct kmem_cache_node *n;
4384 const char *name; 4247 const char *name;
4385 unsigned long *x = m->private; 4248 unsigned long *x = m->private;
@@ -4403,10 +4266,10 @@ static int leaks_show(struct seq_file *m, void *p)
4403 check_irq_on(); 4266 check_irq_on();
4404 spin_lock_irq(&n->list_lock); 4267 spin_lock_irq(&n->list_lock);
4405 4268
4406 list_for_each_entry(slabp, &n->slabs_full, list) 4269 list_for_each_entry(page, &n->slabs_full, lru)
4407 handle_slab(x, cachep, slabp); 4270 handle_slab(x, cachep, page);
4408 list_for_each_entry(slabp, &n->slabs_partial, list) 4271 list_for_each_entry(page, &n->slabs_partial, lru)
4409 handle_slab(x, cachep, slabp); 4272 handle_slab(x, cachep, page);
4410 spin_unlock_irq(&n->list_lock); 4273 spin_unlock_irq(&n->list_lock);
4411 } 4274 }
4412 name = cachep->name; 4275 name = cachep->name;