aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slub.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c368
1 files changed, 129 insertions, 239 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 4996fc719552..d2a54fe71ea2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -151,7 +151,8 @@
151 * Set of flags that will prevent slab merging 151 * Set of flags that will prevent slab merging
152 */ 152 */
153#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 153#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
154 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE) 154 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
155 SLAB_FAILSLAB)
155 156
156#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ 157#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
157 SLAB_CACHE_DMA | SLAB_NOTRACK) 158 SLAB_CACHE_DMA | SLAB_NOTRACK)
@@ -217,10 +218,10 @@ static inline void sysfs_slab_remove(struct kmem_cache *s)
217 218
218#endif 219#endif
219 220
220static inline void stat(struct kmem_cache_cpu *c, enum stat_item si) 221static inline void stat(struct kmem_cache *s, enum stat_item si)
221{ 222{
222#ifdef CONFIG_SLUB_STATS 223#ifdef CONFIG_SLUB_STATS
223 c->stat[si]++; 224 __this_cpu_inc(s->cpu_slab->stat[si]);
224#endif 225#endif
225} 226}
226 227
@@ -242,15 +243,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
242#endif 243#endif
243} 244}
244 245
245static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
246{
247#ifdef CONFIG_SMP
248 return s->cpu_slab[cpu];
249#else
250 return &s->cpu_slab;
251#endif
252}
253
254/* Verify that a pointer has an address that is valid within a slab page */ 246/* Verify that a pointer has an address that is valid within a slab page */
255static inline int check_valid_pointer(struct kmem_cache *s, 247static inline int check_valid_pointer(struct kmem_cache *s,
256 struct page *page, const void *object) 248 struct page *page, const void *object)
@@ -269,13 +261,6 @@ static inline int check_valid_pointer(struct kmem_cache *s,
269 return 1; 261 return 1;
270} 262}
271 263
272/*
273 * Slow version of get and set free pointer.
274 *
275 * This version requires touching the cache lines of kmem_cache which
276 * we avoid to do in the fast alloc free paths. There we obtain the offset
277 * from the page struct.
278 */
279static inline void *get_freepointer(struct kmem_cache *s, void *object) 264static inline void *get_freepointer(struct kmem_cache *s, void *object)
280{ 265{
281 return *(void **)(object + s->offset); 266 return *(void **)(object + s->offset);
@@ -1020,6 +1005,9 @@ static int __init setup_slub_debug(char *str)
1020 case 't': 1005 case 't':
1021 slub_debug |= SLAB_TRACE; 1006 slub_debug |= SLAB_TRACE;
1022 break; 1007 break;
1008 case 'a':
1009 slub_debug |= SLAB_FAILSLAB;
1010 break;
1023 default: 1011 default:
1024 printk(KERN_ERR "slub_debug option '%c' " 1012 printk(KERN_ERR "slub_debug option '%c' "
1025 "unknown. skipped\n", *str); 1013 "unknown. skipped\n", *str);
@@ -1124,7 +1112,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1124 if (!page) 1112 if (!page)
1125 return NULL; 1113 return NULL;
1126 1114
1127 stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); 1115 stat(s, ORDER_FALLBACK);
1128 } 1116 }
1129 1117
1130 if (kmemcheck_enabled 1118 if (kmemcheck_enabled
@@ -1422,23 +1410,22 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
1422static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) 1410static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1423{ 1411{
1424 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1412 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1425 struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id());
1426 1413
1427 __ClearPageSlubFrozen(page); 1414 __ClearPageSlubFrozen(page);
1428 if (page->inuse) { 1415 if (page->inuse) {
1429 1416
1430 if (page->freelist) { 1417 if (page->freelist) {
1431 add_partial(n, page, tail); 1418 add_partial(n, page, tail);
1432 stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); 1419 stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
1433 } else { 1420 } else {
1434 stat(c, DEACTIVATE_FULL); 1421 stat(s, DEACTIVATE_FULL);
1435 if (SLABDEBUG && PageSlubDebug(page) && 1422 if (SLABDEBUG && PageSlubDebug(page) &&
1436 (s->flags & SLAB_STORE_USER)) 1423 (s->flags & SLAB_STORE_USER))
1437 add_full(n, page); 1424 add_full(n, page);
1438 } 1425 }
1439 slab_unlock(page); 1426 slab_unlock(page);
1440 } else { 1427 } else {
1441 stat(c, DEACTIVATE_EMPTY); 1428 stat(s, DEACTIVATE_EMPTY);
1442 if (n->nr_partial < s->min_partial) { 1429 if (n->nr_partial < s->min_partial) {
1443 /* 1430 /*
1444 * Adding an empty slab to the partial slabs in order 1431 * Adding an empty slab to the partial slabs in order
@@ -1454,7 +1441,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1454 slab_unlock(page); 1441 slab_unlock(page);
1455 } else { 1442 } else {
1456 slab_unlock(page); 1443 slab_unlock(page);
1457 stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB); 1444 stat(s, FREE_SLAB);
1458 discard_slab(s, page); 1445 discard_slab(s, page);
1459 } 1446 }
1460 } 1447 }
@@ -1469,7 +1456,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1469 int tail = 1; 1456 int tail = 1;
1470 1457
1471 if (page->freelist) 1458 if (page->freelist)
1472 stat(c, DEACTIVATE_REMOTE_FREES); 1459 stat(s, DEACTIVATE_REMOTE_FREES);
1473 /* 1460 /*
1474 * Merge cpu freelist into slab freelist. Typically we get here 1461 * Merge cpu freelist into slab freelist. Typically we get here
1475 * because both freelists are empty. So this is unlikely 1462 * because both freelists are empty. So this is unlikely
@@ -1482,10 +1469,10 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1482 1469
1483 /* Retrieve object from cpu_freelist */ 1470 /* Retrieve object from cpu_freelist */
1484 object = c->freelist; 1471 object = c->freelist;
1485 c->freelist = c->freelist[c->offset]; 1472 c->freelist = get_freepointer(s, c->freelist);
1486 1473
1487 /* And put onto the regular freelist */ 1474 /* And put onto the regular freelist */
1488 object[c->offset] = page->freelist; 1475 set_freepointer(s, object, page->freelist);
1489 page->freelist = object; 1476 page->freelist = object;
1490 page->inuse--; 1477 page->inuse--;
1491 } 1478 }
@@ -1495,7 +1482,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1495 1482
1496static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1483static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1497{ 1484{
1498 stat(c, CPUSLAB_FLUSH); 1485 stat(s, CPUSLAB_FLUSH);
1499 slab_lock(c->page); 1486 slab_lock(c->page);
1500 deactivate_slab(s, c); 1487 deactivate_slab(s, c);
1501} 1488}
@@ -1507,7 +1494,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1507 */ 1494 */
1508static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) 1495static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
1509{ 1496{
1510 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 1497 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
1511 1498
1512 if (likely(c && c->page)) 1499 if (likely(c && c->page))
1513 flush_slab(s, c); 1500 flush_slab(s, c);
@@ -1635,7 +1622,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
1635 if (unlikely(!node_match(c, node))) 1622 if (unlikely(!node_match(c, node)))
1636 goto another_slab; 1623 goto another_slab;
1637 1624
1638 stat(c, ALLOC_REFILL); 1625 stat(s, ALLOC_REFILL);
1639 1626
1640load_freelist: 1627load_freelist:
1641 object = c->page->freelist; 1628 object = c->page->freelist;
@@ -1644,13 +1631,13 @@ load_freelist:
1644 if (unlikely(SLABDEBUG && PageSlubDebug(c->page))) 1631 if (unlikely(SLABDEBUG && PageSlubDebug(c->page)))
1645 goto debug; 1632 goto debug;
1646 1633
1647 c->freelist = object[c->offset]; 1634 c->freelist = get_freepointer(s, object);
1648 c->page->inuse = c->page->objects; 1635 c->page->inuse = c->page->objects;
1649 c->page->freelist = NULL; 1636 c->page->freelist = NULL;
1650 c->node = page_to_nid(c->page); 1637 c->node = page_to_nid(c->page);
1651unlock_out: 1638unlock_out:
1652 slab_unlock(c->page); 1639 slab_unlock(c->page);
1653 stat(c, ALLOC_SLOWPATH); 1640 stat(s, ALLOC_SLOWPATH);
1654 return object; 1641 return object;
1655 1642
1656another_slab: 1643another_slab:
@@ -1660,7 +1647,7 @@ new_slab:
1660 new = get_partial(s, gfpflags, node); 1647 new = get_partial(s, gfpflags, node);
1661 if (new) { 1648 if (new) {
1662 c->page = new; 1649 c->page = new;
1663 stat(c, ALLOC_FROM_PARTIAL); 1650 stat(s, ALLOC_FROM_PARTIAL);
1664 goto load_freelist; 1651 goto load_freelist;
1665 } 1652 }
1666 1653
@@ -1673,8 +1660,8 @@ new_slab:
1673 local_irq_disable(); 1660 local_irq_disable();
1674 1661
1675 if (new) { 1662 if (new) {
1676 c = get_cpu_slab(s, smp_processor_id()); 1663 c = __this_cpu_ptr(s->cpu_slab);
1677 stat(c, ALLOC_SLAB); 1664 stat(s, ALLOC_SLAB);
1678 if (c->page) 1665 if (c->page)
1679 flush_slab(s, c); 1666 flush_slab(s, c);
1680 slab_lock(new); 1667 slab_lock(new);
@@ -1690,7 +1677,7 @@ debug:
1690 goto another_slab; 1677 goto another_slab;
1691 1678
1692 c->page->inuse++; 1679 c->page->inuse++;
1693 c->page->freelist = object[c->offset]; 1680 c->page->freelist = get_freepointer(s, object);
1694 c->node = -1; 1681 c->node = -1;
1695 goto unlock_out; 1682 goto unlock_out;
1696} 1683}
@@ -1711,35 +1698,33 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1711 void **object; 1698 void **object;
1712 struct kmem_cache_cpu *c; 1699 struct kmem_cache_cpu *c;
1713 unsigned long flags; 1700 unsigned long flags;
1714 unsigned int objsize;
1715 1701
1716 gfpflags &= gfp_allowed_mask; 1702 gfpflags &= gfp_allowed_mask;
1717 1703
1718 lockdep_trace_alloc(gfpflags); 1704 lockdep_trace_alloc(gfpflags);
1719 might_sleep_if(gfpflags & __GFP_WAIT); 1705 might_sleep_if(gfpflags & __GFP_WAIT);
1720 1706
1721 if (should_failslab(s->objsize, gfpflags)) 1707 if (should_failslab(s->objsize, gfpflags, s->flags))
1722 return NULL; 1708 return NULL;
1723 1709
1724 local_irq_save(flags); 1710 local_irq_save(flags);
1725 c = get_cpu_slab(s, smp_processor_id()); 1711 c = __this_cpu_ptr(s->cpu_slab);
1726 objsize = c->objsize; 1712 object = c->freelist;
1727 if (unlikely(!c->freelist || !node_match(c, node))) 1713 if (unlikely(!object || !node_match(c, node)))
1728 1714
1729 object = __slab_alloc(s, gfpflags, node, addr, c); 1715 object = __slab_alloc(s, gfpflags, node, addr, c);
1730 1716
1731 else { 1717 else {
1732 object = c->freelist; 1718 c->freelist = get_freepointer(s, object);
1733 c->freelist = object[c->offset]; 1719 stat(s, ALLOC_FASTPATH);
1734 stat(c, ALLOC_FASTPATH);
1735 } 1720 }
1736 local_irq_restore(flags); 1721 local_irq_restore(flags);
1737 1722
1738 if (unlikely((gfpflags & __GFP_ZERO) && object)) 1723 if (unlikely(gfpflags & __GFP_ZERO) && object)
1739 memset(object, 0, objsize); 1724 memset(object, 0, s->objsize);
1740 1725
1741 kmemcheck_slab_alloc(s, gfpflags, object, c->objsize); 1726 kmemcheck_slab_alloc(s, gfpflags, object, s->objsize);
1742 kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags); 1727 kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, gfpflags);
1743 1728
1744 return object; 1729 return object;
1745} 1730}
@@ -1754,7 +1739,7 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
1754} 1739}
1755EXPORT_SYMBOL(kmem_cache_alloc); 1740EXPORT_SYMBOL(kmem_cache_alloc);
1756 1741
1757#ifdef CONFIG_KMEMTRACE 1742#ifdef CONFIG_TRACING
1758void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags) 1743void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
1759{ 1744{
1760 return slab_alloc(s, gfpflags, -1, _RET_IP_); 1745 return slab_alloc(s, gfpflags, -1, _RET_IP_);
@@ -1775,7 +1760,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
1775EXPORT_SYMBOL(kmem_cache_alloc_node); 1760EXPORT_SYMBOL(kmem_cache_alloc_node);
1776#endif 1761#endif
1777 1762
1778#ifdef CONFIG_KMEMTRACE 1763#ifdef CONFIG_TRACING
1779void *kmem_cache_alloc_node_notrace(struct kmem_cache *s, 1764void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
1780 gfp_t gfpflags, 1765 gfp_t gfpflags,
1781 int node) 1766 int node)
@@ -1794,26 +1779,25 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
1794 * handling required then we can return immediately. 1779 * handling required then we can return immediately.
1795 */ 1780 */
1796static void __slab_free(struct kmem_cache *s, struct page *page, 1781static void __slab_free(struct kmem_cache *s, struct page *page,
1797 void *x, unsigned long addr, unsigned int offset) 1782 void *x, unsigned long addr)
1798{ 1783{
1799 void *prior; 1784 void *prior;
1800 void **object = (void *)x; 1785 void **object = (void *)x;
1801 struct kmem_cache_cpu *c;
1802 1786
1803 c = get_cpu_slab(s, raw_smp_processor_id()); 1787 stat(s, FREE_SLOWPATH);
1804 stat(c, FREE_SLOWPATH);
1805 slab_lock(page); 1788 slab_lock(page);
1806 1789
1807 if (unlikely(SLABDEBUG && PageSlubDebug(page))) 1790 if (unlikely(SLABDEBUG && PageSlubDebug(page)))
1808 goto debug; 1791 goto debug;
1809 1792
1810checks_ok: 1793checks_ok:
1811 prior = object[offset] = page->freelist; 1794 prior = page->freelist;
1795 set_freepointer(s, object, prior);
1812 page->freelist = object; 1796 page->freelist = object;
1813 page->inuse--; 1797 page->inuse--;
1814 1798
1815 if (unlikely(PageSlubFrozen(page))) { 1799 if (unlikely(PageSlubFrozen(page))) {
1816 stat(c, FREE_FROZEN); 1800 stat(s, FREE_FROZEN);
1817 goto out_unlock; 1801 goto out_unlock;
1818 } 1802 }
1819 1803
@@ -1826,7 +1810,7 @@ checks_ok:
1826 */ 1810 */
1827 if (unlikely(!prior)) { 1811 if (unlikely(!prior)) {
1828 add_partial(get_node(s, page_to_nid(page)), page, 1); 1812 add_partial(get_node(s, page_to_nid(page)), page, 1);
1829 stat(c, FREE_ADD_PARTIAL); 1813 stat(s, FREE_ADD_PARTIAL);
1830 } 1814 }
1831 1815
1832out_unlock: 1816out_unlock:
@@ -1839,10 +1823,10 @@ slab_empty:
1839 * Slab still on the partial list. 1823 * Slab still on the partial list.
1840 */ 1824 */
1841 remove_partial(s, page); 1825 remove_partial(s, page);
1842 stat(c, FREE_REMOVE_PARTIAL); 1826 stat(s, FREE_REMOVE_PARTIAL);
1843 } 1827 }
1844 slab_unlock(page); 1828 slab_unlock(page);
1845 stat(c, FREE_SLAB); 1829 stat(s, FREE_SLAB);
1846 discard_slab(s, page); 1830 discard_slab(s, page);
1847 return; 1831 return;
1848 1832
@@ -1872,17 +1856,17 @@ static __always_inline void slab_free(struct kmem_cache *s,
1872 1856
1873 kmemleak_free_recursive(x, s->flags); 1857 kmemleak_free_recursive(x, s->flags);
1874 local_irq_save(flags); 1858 local_irq_save(flags);
1875 c = get_cpu_slab(s, smp_processor_id()); 1859 c = __this_cpu_ptr(s->cpu_slab);
1876 kmemcheck_slab_free(s, object, c->objsize); 1860 kmemcheck_slab_free(s, object, s->objsize);
1877 debug_check_no_locks_freed(object, c->objsize); 1861 debug_check_no_locks_freed(object, s->objsize);
1878 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 1862 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1879 debug_check_no_obj_freed(object, c->objsize); 1863 debug_check_no_obj_freed(object, s->objsize);
1880 if (likely(page == c->page && c->node >= 0)) { 1864 if (likely(page == c->page && c->node >= 0)) {
1881 object[c->offset] = c->freelist; 1865 set_freepointer(s, object, c->freelist);
1882 c->freelist = object; 1866 c->freelist = object;
1883 stat(c, FREE_FASTPATH); 1867 stat(s, FREE_FASTPATH);
1884 } else 1868 } else
1885 __slab_free(s, page, x, addr, c->offset); 1869 __slab_free(s, page, x, addr);
1886 1870
1887 local_irq_restore(flags); 1871 local_irq_restore(flags);
1888} 1872}
@@ -2069,19 +2053,6 @@ static unsigned long calculate_alignment(unsigned long flags,
2069 return ALIGN(align, sizeof(void *)); 2053 return ALIGN(align, sizeof(void *));
2070} 2054}
2071 2055
2072static void init_kmem_cache_cpu(struct kmem_cache *s,
2073 struct kmem_cache_cpu *c)
2074{
2075 c->page = NULL;
2076 c->freelist = NULL;
2077 c->node = 0;
2078 c->offset = s->offset / sizeof(void *);
2079 c->objsize = s->objsize;
2080#ifdef CONFIG_SLUB_STATS
2081 memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned));
2082#endif
2083}
2084
2085static void 2056static void
2086init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) 2057init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
2087{ 2058{
@@ -2095,130 +2066,24 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
2095#endif 2066#endif
2096} 2067}
2097 2068
2098#ifdef CONFIG_SMP 2069static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[KMALLOC_CACHES]);
2099/*
2100 * Per cpu array for per cpu structures.
2101 *
2102 * The per cpu array places all kmem_cache_cpu structures from one processor
2103 * close together meaning that it becomes possible that multiple per cpu
2104 * structures are contained in one cacheline. This may be particularly
2105 * beneficial for the kmalloc caches.
2106 *
2107 * A desktop system typically has around 60-80 slabs. With 100 here we are
2108 * likely able to get per cpu structures for all caches from the array defined
2109 * here. We must be able to cover all kmalloc caches during bootstrap.
2110 *
2111 * If the per cpu array is exhausted then fall back to kmalloc
2112 * of individual cachelines. No sharing is possible then.
2113 */
2114#define NR_KMEM_CACHE_CPU 100
2115
2116static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU],
2117 kmem_cache_cpu);
2118
2119static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
2120static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
2121
2122static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
2123 int cpu, gfp_t flags)
2124{
2125 struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu);
2126
2127 if (c)
2128 per_cpu(kmem_cache_cpu_free, cpu) =
2129 (void *)c->freelist;
2130 else {
2131 /* Table overflow: So allocate ourselves */
2132 c = kmalloc_node(
2133 ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()),
2134 flags, cpu_to_node(cpu));
2135 if (!c)
2136 return NULL;
2137 }
2138
2139 init_kmem_cache_cpu(s, c);
2140 return c;
2141}
2142
2143static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
2144{
2145 if (c < per_cpu(kmem_cache_cpu, cpu) ||
2146 c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
2147 kfree(c);
2148 return;
2149 }
2150 c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu);
2151 per_cpu(kmem_cache_cpu_free, cpu) = c;
2152}
2153
2154static void free_kmem_cache_cpus(struct kmem_cache *s)
2155{
2156 int cpu;
2157
2158 for_each_online_cpu(cpu) {
2159 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
2160
2161 if (c) {
2162 s->cpu_slab[cpu] = NULL;
2163 free_kmem_cache_cpu(c, cpu);
2164 }
2165 }
2166}
2167
2168static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2169{
2170 int cpu;
2171
2172 for_each_online_cpu(cpu) {
2173 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
2174
2175 if (c)
2176 continue;
2177
2178 c = alloc_kmem_cache_cpu(s, cpu, flags);
2179 if (!c) {
2180 free_kmem_cache_cpus(s);
2181 return 0;
2182 }
2183 s->cpu_slab[cpu] = c;
2184 }
2185 return 1;
2186}
2187
2188/*
2189 * Initialize the per cpu array.
2190 */
2191static void init_alloc_cpu_cpu(int cpu)
2192{
2193 int i;
2194
2195 if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)))
2196 return;
2197
2198 for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
2199 free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
2200
2201 cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once));
2202}
2203 2070
2204static void __init init_alloc_cpu(void) 2071static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2205{ 2072{
2206 int cpu; 2073 if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches)
2207 2074 /*
2208 for_each_online_cpu(cpu) 2075 * Boot time creation of the kmalloc array. Use static per cpu data
2209 init_alloc_cpu_cpu(cpu); 2076 * since the per cpu allocator is not available yet.
2210 } 2077 */
2078 s->cpu_slab = kmalloc_percpu + (s - kmalloc_caches);
2079 else
2080 s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
2211 2081
2212#else 2082 if (!s->cpu_slab)
2213static inline void free_kmem_cache_cpus(struct kmem_cache *s) {} 2083 return 0;
2214static inline void init_alloc_cpu(void) {}
2215 2084
2216static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2217{
2218 init_kmem_cache_cpu(s, &s->cpu_slab);
2219 return 1; 2085 return 1;
2220} 2086}
2221#endif
2222 2087
2223#ifdef CONFIG_NUMA 2088#ifdef CONFIG_NUMA
2224/* 2089/*
@@ -2287,7 +2152,8 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
2287 int node; 2152 int node;
2288 int local_node; 2153 int local_node;
2289 2154
2290 if (slab_state >= UP) 2155 if (slab_state >= UP && (s < kmalloc_caches ||
2156 s >= kmalloc_caches + KMALLOC_CACHES))
2291 local_node = page_to_nid(virt_to_page(s)); 2157 local_node = page_to_nid(virt_to_page(s));
2292 else 2158 else
2293 local_node = 0; 2159 local_node = 0;
@@ -2502,6 +2368,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags,
2502 2368
2503 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) 2369 if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA))
2504 return 1; 2370 return 1;
2371
2505 free_kmem_cache_nodes(s); 2372 free_kmem_cache_nodes(s);
2506error: 2373error:
2507 if (flags & SLAB_PANIC) 2374 if (flags & SLAB_PANIC)
@@ -2519,6 +2386,9 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object)
2519{ 2386{
2520 struct page *page; 2387 struct page *page;
2521 2388
2389 if (!kern_ptr_validate(object, s->size))
2390 return 0;
2391
2522 page = get_object_page(object); 2392 page = get_object_page(object);
2523 2393
2524 if (!page || s != page->slab) 2394 if (!page || s != page->slab)
@@ -2609,9 +2479,8 @@ static inline int kmem_cache_close(struct kmem_cache *s)
2609 int node; 2479 int node;
2610 2480
2611 flush_all(s); 2481 flush_all(s);
2612 2482 free_percpu(s->cpu_slab);
2613 /* Attempt to free all objects */ 2483 /* Attempt to free all objects */
2614 free_kmem_cache_cpus(s);
2615 for_each_node_state(node, N_NORMAL_MEMORY) { 2484 for_each_node_state(node, N_NORMAL_MEMORY) {
2616 struct kmem_cache_node *n = get_node(s, node); 2485 struct kmem_cache_node *n = get_node(s, node);
2617 2486
@@ -2651,7 +2520,7 @@ EXPORT_SYMBOL(kmem_cache_destroy);
2651 * Kmalloc subsystem 2520 * Kmalloc subsystem
2652 *******************************************************************/ 2521 *******************************************************************/
2653 2522
2654struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned; 2523struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned;
2655EXPORT_SYMBOL(kmalloc_caches); 2524EXPORT_SYMBOL(kmalloc_caches);
2656 2525
2657static int __init setup_slub_min_order(char *str) 2526static int __init setup_slub_min_order(char *str)
@@ -2741,6 +2610,7 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
2741 char *text; 2610 char *text;
2742 size_t realsize; 2611 size_t realsize;
2743 unsigned long slabflags; 2612 unsigned long slabflags;
2613 int i;
2744 2614
2745 s = kmalloc_caches_dma[index]; 2615 s = kmalloc_caches_dma[index];
2746 if (s) 2616 if (s)
@@ -2760,7 +2630,14 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
2760 realsize = kmalloc_caches[index].objsize; 2630 realsize = kmalloc_caches[index].objsize;
2761 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", 2631 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
2762 (unsigned int)realsize); 2632 (unsigned int)realsize);
2763 s = kmalloc(kmem_size, flags & ~SLUB_DMA); 2633
2634 s = NULL;
2635 for (i = 0; i < KMALLOC_CACHES; i++)
2636 if (!kmalloc_caches[i].size)
2637 break;
2638
2639 BUG_ON(i >= KMALLOC_CACHES);
2640 s = kmalloc_caches + i;
2764 2641
2765 /* 2642 /*
2766 * Must defer sysfs creation to a workqueue because we don't know 2643 * Must defer sysfs creation to a workqueue because we don't know
@@ -2772,9 +2649,9 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
2772 if (slab_state >= SYSFS) 2649 if (slab_state >= SYSFS)
2773 slabflags |= __SYSFS_ADD_DEFERRED; 2650 slabflags |= __SYSFS_ADD_DEFERRED;
2774 2651
2775 if (!s || !text || !kmem_cache_open(s, flags, text, 2652 if (!text || !kmem_cache_open(s, flags, text,
2776 realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) { 2653 realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) {
2777 kfree(s); 2654 s->size = 0;
2778 kfree(text); 2655 kfree(text);
2779 goto unlock_out; 2656 goto unlock_out;
2780 } 2657 }
@@ -3086,7 +2963,7 @@ static void slab_mem_offline_callback(void *arg)
3086 /* 2963 /*
3087 * if n->nr_slabs > 0, slabs still exist on the node 2964 * if n->nr_slabs > 0, slabs still exist on the node
3088 * that is going down. We were unable to free them, 2965 * that is going down. We were unable to free them,
3089 * and offline_pages() function shoudn't call this 2966 * and offline_pages() function shouldn't call this
3090 * callback. So, we must fail. 2967 * callback. So, we must fail.
3091 */ 2968 */
3092 BUG_ON(slabs_node(s, offline_node)); 2969 BUG_ON(slabs_node(s, offline_node));
@@ -3176,8 +3053,6 @@ void __init kmem_cache_init(void)
3176 int i; 3053 int i;
3177 int caches = 0; 3054 int caches = 0;
3178 3055
3179 init_alloc_cpu();
3180
3181#ifdef CONFIG_NUMA 3056#ifdef CONFIG_NUMA
3182 /* 3057 /*
3183 * Must first have the slab cache available for the allocations of the 3058 * Must first have the slab cache available for the allocations of the
@@ -3261,8 +3136,10 @@ void __init kmem_cache_init(void)
3261 3136
3262#ifdef CONFIG_SMP 3137#ifdef CONFIG_SMP
3263 register_cpu_notifier(&slab_notifier); 3138 register_cpu_notifier(&slab_notifier);
3264 kmem_size = offsetof(struct kmem_cache, cpu_slab) + 3139#endif
3265 nr_cpu_ids * sizeof(struct kmem_cache_cpu *); 3140#ifdef CONFIG_NUMA
3141 kmem_size = offsetof(struct kmem_cache, node) +
3142 nr_node_ids * sizeof(struct kmem_cache_node *);
3266#else 3143#else
3267 kmem_size = sizeof(struct kmem_cache); 3144 kmem_size = sizeof(struct kmem_cache);
3268#endif 3145#endif
@@ -3351,22 +3228,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3351 down_write(&slub_lock); 3228 down_write(&slub_lock);
3352 s = find_mergeable(size, align, flags, name, ctor); 3229 s = find_mergeable(size, align, flags, name, ctor);
3353 if (s) { 3230 if (s) {
3354 int cpu;
3355
3356 s->refcount++; 3231 s->refcount++;
3357 /* 3232 /*
3358 * Adjust the object sizes so that we clear 3233 * Adjust the object sizes so that we clear
3359 * the complete object on kzalloc. 3234 * the complete object on kzalloc.
3360 */ 3235 */
3361 s->objsize = max(s->objsize, (int)size); 3236 s->objsize = max(s->objsize, (int)size);
3362
3363 /*
3364 * And then we need to update the object size in the
3365 * per cpu structures
3366 */
3367 for_each_online_cpu(cpu)
3368 get_cpu_slab(s, cpu)->objsize = s->objsize;
3369
3370 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3237 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3371 up_write(&slub_lock); 3238 up_write(&slub_lock);
3372 3239
@@ -3420,29 +3287,15 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3420 unsigned long flags; 3287 unsigned long flags;
3421 3288
3422 switch (action) { 3289 switch (action) {
3423 case CPU_UP_PREPARE:
3424 case CPU_UP_PREPARE_FROZEN:
3425 init_alloc_cpu_cpu(cpu);
3426 down_read(&slub_lock);
3427 list_for_each_entry(s, &slab_caches, list)
3428 s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu,
3429 GFP_KERNEL);
3430 up_read(&slub_lock);
3431 break;
3432
3433 case CPU_UP_CANCELED: 3290 case CPU_UP_CANCELED:
3434 case CPU_UP_CANCELED_FROZEN: 3291 case CPU_UP_CANCELED_FROZEN:
3435 case CPU_DEAD: 3292 case CPU_DEAD:
3436 case CPU_DEAD_FROZEN: 3293 case CPU_DEAD_FROZEN:
3437 down_read(&slub_lock); 3294 down_read(&slub_lock);
3438 list_for_each_entry(s, &slab_caches, list) { 3295 list_for_each_entry(s, &slab_caches, list) {
3439 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3440
3441 local_irq_save(flags); 3296 local_irq_save(flags);
3442 __flush_cpu_slab(s, cpu); 3297 __flush_cpu_slab(s, cpu);
3443 local_irq_restore(flags); 3298 local_irq_restore(flags);
3444 free_kmem_cache_cpu(c, cpu);
3445 s->cpu_slab[cpu] = NULL;
3446 } 3299 }
3447 up_read(&slub_lock); 3300 up_read(&slub_lock);
3448 break; 3301 break;
@@ -3928,7 +3781,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
3928 int cpu; 3781 int cpu;
3929 3782
3930 for_each_possible_cpu(cpu) { 3783 for_each_possible_cpu(cpu) {
3931 struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); 3784 struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
3932 3785
3933 if (!c || c->node < 0) 3786 if (!c || c->node < 0)
3934 continue; 3787 continue;
@@ -4171,6 +4024,23 @@ static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4171} 4024}
4172SLAB_ATTR(trace); 4025SLAB_ATTR(trace);
4173 4026
4027#ifdef CONFIG_FAILSLAB
4028static ssize_t failslab_show(struct kmem_cache *s, char *buf)
4029{
4030 return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB));
4031}
4032
4033static ssize_t failslab_store(struct kmem_cache *s, const char *buf,
4034 size_t length)
4035{
4036 s->flags &= ~SLAB_FAILSLAB;
4037 if (buf[0] == '1')
4038 s->flags |= SLAB_FAILSLAB;
4039 return length;
4040}
4041SLAB_ATTR(failslab);
4042#endif
4043
4174static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) 4044static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf)
4175{ 4045{
4176 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); 4046 return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT));
@@ -4353,7 +4223,7 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4353 return -ENOMEM; 4223 return -ENOMEM;
4354 4224
4355 for_each_online_cpu(cpu) { 4225 for_each_online_cpu(cpu) {
4356 unsigned x = get_cpu_slab(s, cpu)->stat[si]; 4226 unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
4357 4227
4358 data[cpu] = x; 4228 data[cpu] = x;
4359 sum += x; 4229 sum += x;
@@ -4371,12 +4241,28 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4371 return len + sprintf(buf + len, "\n"); 4241 return len + sprintf(buf + len, "\n");
4372} 4242}
4373 4243
4244static void clear_stat(struct kmem_cache *s, enum stat_item si)
4245{
4246 int cpu;
4247
4248 for_each_online_cpu(cpu)
4249 per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
4250}
4251
4374#define STAT_ATTR(si, text) \ 4252#define STAT_ATTR(si, text) \
4375static ssize_t text##_show(struct kmem_cache *s, char *buf) \ 4253static ssize_t text##_show(struct kmem_cache *s, char *buf) \
4376{ \ 4254{ \
4377 return show_stat(s, buf, si); \ 4255 return show_stat(s, buf, si); \
4378} \ 4256} \
4379SLAB_ATTR_RO(text); \ 4257static ssize_t text##_store(struct kmem_cache *s, \
4258 const char *buf, size_t length) \
4259{ \
4260 if (buf[0] != '0') \
4261 return -EINVAL; \
4262 clear_stat(s, si); \
4263 return length; \
4264} \
4265SLAB_ATTR(text); \
4380 4266
4381STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); 4267STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
4382STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); 4268STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
@@ -4451,6 +4337,10 @@ static struct attribute *slab_attrs[] = {
4451 &deactivate_remote_frees_attr.attr, 4337 &deactivate_remote_frees_attr.attr,
4452 &order_fallback_attr.attr, 4338 &order_fallback_attr.attr,
4453#endif 4339#endif
4340#ifdef CONFIG_FAILSLAB
4341 &failslab_attr.attr,
4342#endif
4343
4454 NULL 4344 NULL
4455}; 4345};
4456 4346
@@ -4503,7 +4393,7 @@ static void kmem_cache_release(struct kobject *kobj)
4503 kfree(s); 4393 kfree(s);
4504} 4394}
4505 4395
4506static struct sysfs_ops slab_sysfs_ops = { 4396static const struct sysfs_ops slab_sysfs_ops = {
4507 .show = slab_attr_show, 4397 .show = slab_attr_show,
4508 .store = slab_attr_store, 4398 .store = slab_attr_store,
4509}; 4399};
@@ -4522,7 +4412,7 @@ static int uevent_filter(struct kset *kset, struct kobject *kobj)
4522 return 0; 4412 return 0;
4523} 4413}
4524 4414
4525static struct kset_uevent_ops slab_uevent_ops = { 4415static const struct kset_uevent_ops slab_uevent_ops = {
4526 .filter = uevent_filter, 4416 .filter = uevent_filter,
4527}; 4417};
4528 4418