diff options
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 337 |
1 files changed, 104 insertions, 233 deletions
@@ -151,7 +151,8 @@ | |||
151 | * Set of flags that will prevent slab merging | 151 | * Set of flags that will prevent slab merging |
152 | */ | 152 | */ |
153 | #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ | 153 | #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ |
154 | SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE) | 154 | SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ |
155 | SLAB_FAILSLAB) | ||
155 | 156 | ||
156 | #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ | 157 | #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ |
157 | SLAB_CACHE_DMA | SLAB_NOTRACK) | 158 | SLAB_CACHE_DMA | SLAB_NOTRACK) |
@@ -217,10 +218,10 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) | |||
217 | 218 | ||
218 | #endif | 219 | #endif |
219 | 220 | ||
220 | static inline void stat(struct kmem_cache_cpu *c, enum stat_item si) | 221 | static inline void stat(struct kmem_cache *s, enum stat_item si) |
221 | { | 222 | { |
222 | #ifdef CONFIG_SLUB_STATS | 223 | #ifdef CONFIG_SLUB_STATS |
223 | c->stat[si]++; | 224 | __this_cpu_inc(s->cpu_slab->stat[si]); |
224 | #endif | 225 | #endif |
225 | } | 226 | } |
226 | 227 | ||
@@ -242,15 +243,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) | |||
242 | #endif | 243 | #endif |
243 | } | 244 | } |
244 | 245 | ||
245 | static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu) | ||
246 | { | ||
247 | #ifdef CONFIG_SMP | ||
248 | return s->cpu_slab[cpu]; | ||
249 | #else | ||
250 | return &s->cpu_slab; | ||
251 | #endif | ||
252 | } | ||
253 | |||
254 | /* Verify that a pointer has an address that is valid within a slab page */ | 246 | /* Verify that a pointer has an address that is valid within a slab page */ |
255 | static inline int check_valid_pointer(struct kmem_cache *s, | 247 | static inline int check_valid_pointer(struct kmem_cache *s, |
256 | struct page *page, const void *object) | 248 | struct page *page, const void *object) |
@@ -269,13 +261,6 @@ static inline int check_valid_pointer(struct kmem_cache *s, | |||
269 | return 1; | 261 | return 1; |
270 | } | 262 | } |
271 | 263 | ||
272 | /* | ||
273 | * Slow version of get and set free pointer. | ||
274 | * | ||
275 | * This version requires touching the cache lines of kmem_cache which | ||
276 | * we avoid to do in the fast alloc free paths. There we obtain the offset | ||
277 | * from the page struct. | ||
278 | */ | ||
279 | static inline void *get_freepointer(struct kmem_cache *s, void *object) | 264 | static inline void *get_freepointer(struct kmem_cache *s, void *object) |
280 | { | 265 | { |
281 | return *(void **)(object + s->offset); | 266 | return *(void **)(object + s->offset); |
@@ -1020,6 +1005,9 @@ static int __init setup_slub_debug(char *str) | |||
1020 | case 't': | 1005 | case 't': |
1021 | slub_debug |= SLAB_TRACE; | 1006 | slub_debug |= SLAB_TRACE; |
1022 | break; | 1007 | break; |
1008 | case 'a': | ||
1009 | slub_debug |= SLAB_FAILSLAB; | ||
1010 | break; | ||
1023 | default: | 1011 | default: |
1024 | printk(KERN_ERR "slub_debug option '%c' " | 1012 | printk(KERN_ERR "slub_debug option '%c' " |
1025 | "unknown. skipped\n", *str); | 1013 | "unknown. skipped\n", *str); |
@@ -1124,7 +1112,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1124 | if (!page) | 1112 | if (!page) |
1125 | return NULL; | 1113 | return NULL; |
1126 | 1114 | ||
1127 | stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); | 1115 | stat(s, ORDER_FALLBACK); |
1128 | } | 1116 | } |
1129 | 1117 | ||
1130 | if (kmemcheck_enabled | 1118 | if (kmemcheck_enabled |
@@ -1422,23 +1410,22 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node) | |||
1422 | static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | 1410 | static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) |
1423 | { | 1411 | { |
1424 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); | 1412 | struct kmem_cache_node *n = get_node(s, page_to_nid(page)); |
1425 | struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id()); | ||
1426 | 1413 | ||
1427 | __ClearPageSlubFrozen(page); | 1414 | __ClearPageSlubFrozen(page); |
1428 | if (page->inuse) { | 1415 | if (page->inuse) { |
1429 | 1416 | ||
1430 | if (page->freelist) { | 1417 | if (page->freelist) { |
1431 | add_partial(n, page, tail); | 1418 | add_partial(n, page, tail); |
1432 | stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); | 1419 | stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); |
1433 | } else { | 1420 | } else { |
1434 | stat(c, DEACTIVATE_FULL); | 1421 | stat(s, DEACTIVATE_FULL); |
1435 | if (SLABDEBUG && PageSlubDebug(page) && | 1422 | if (SLABDEBUG && PageSlubDebug(page) && |
1436 | (s->flags & SLAB_STORE_USER)) | 1423 | (s->flags & SLAB_STORE_USER)) |
1437 | add_full(n, page); | 1424 | add_full(n, page); |
1438 | } | 1425 | } |
1439 | slab_unlock(page); | 1426 | slab_unlock(page); |
1440 | } else { | 1427 | } else { |
1441 | stat(c, DEACTIVATE_EMPTY); | 1428 | stat(s, DEACTIVATE_EMPTY); |
1442 | if (n->nr_partial < s->min_partial) { | 1429 | if (n->nr_partial < s->min_partial) { |
1443 | /* | 1430 | /* |
1444 | * Adding an empty slab to the partial slabs in order | 1431 | * Adding an empty slab to the partial slabs in order |
@@ -1454,7 +1441,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) | |||
1454 | slab_unlock(page); | 1441 | slab_unlock(page); |
1455 | } else { | 1442 | } else { |
1456 | slab_unlock(page); | 1443 | slab_unlock(page); |
1457 | stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB); | 1444 | stat(s, FREE_SLAB); |
1458 | discard_slab(s, page); | 1445 | discard_slab(s, page); |
1459 | } | 1446 | } |
1460 | } | 1447 | } |
@@ -1469,7 +1456,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1469 | int tail = 1; | 1456 | int tail = 1; |
1470 | 1457 | ||
1471 | if (page->freelist) | 1458 | if (page->freelist) |
1472 | stat(c, DEACTIVATE_REMOTE_FREES); | 1459 | stat(s, DEACTIVATE_REMOTE_FREES); |
1473 | /* | 1460 | /* |
1474 | * Merge cpu freelist into slab freelist. Typically we get here | 1461 | * Merge cpu freelist into slab freelist. Typically we get here |
1475 | * because both freelists are empty. So this is unlikely | 1462 | * because both freelists are empty. So this is unlikely |
@@ -1482,10 +1469,10 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1482 | 1469 | ||
1483 | /* Retrieve object from cpu_freelist */ | 1470 | /* Retrieve object from cpu_freelist */ |
1484 | object = c->freelist; | 1471 | object = c->freelist; |
1485 | c->freelist = c->freelist[c->offset]; | 1472 | c->freelist = get_freepointer(s, c->freelist); |
1486 | 1473 | ||
1487 | /* And put onto the regular freelist */ | 1474 | /* And put onto the regular freelist */ |
1488 | object[c->offset] = page->freelist; | 1475 | set_freepointer(s, object, page->freelist); |
1489 | page->freelist = object; | 1476 | page->freelist = object; |
1490 | page->inuse--; | 1477 | page->inuse--; |
1491 | } | 1478 | } |
@@ -1495,7 +1482,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1495 | 1482 | ||
1496 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | 1483 | static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) |
1497 | { | 1484 | { |
1498 | stat(c, CPUSLAB_FLUSH); | 1485 | stat(s, CPUSLAB_FLUSH); |
1499 | slab_lock(c->page); | 1486 | slab_lock(c->page); |
1500 | deactivate_slab(s, c); | 1487 | deactivate_slab(s, c); |
1501 | } | 1488 | } |
@@ -1507,7 +1494,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) | |||
1507 | */ | 1494 | */ |
1508 | static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) | 1495 | static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) |
1509 | { | 1496 | { |
1510 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); | 1497 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); |
1511 | 1498 | ||
1512 | if (likely(c && c->page)) | 1499 | if (likely(c && c->page)) |
1513 | flush_slab(s, c); | 1500 | flush_slab(s, c); |
@@ -1635,7 +1622,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node, | |||
1635 | if (unlikely(!node_match(c, node))) | 1622 | if (unlikely(!node_match(c, node))) |
1636 | goto another_slab; | 1623 | goto another_slab; |
1637 | 1624 | ||
1638 | stat(c, ALLOC_REFILL); | 1625 | stat(s, ALLOC_REFILL); |
1639 | 1626 | ||
1640 | load_freelist: | 1627 | load_freelist: |
1641 | object = c->page->freelist; | 1628 | object = c->page->freelist; |
@@ -1644,13 +1631,13 @@ load_freelist: | |||
1644 | if (unlikely(SLABDEBUG && PageSlubDebug(c->page))) | 1631 | if (unlikely(SLABDEBUG && PageSlubDebug(c->page))) |
1645 | goto debug; | 1632 | goto debug; |
1646 | 1633 | ||
1647 | c->freelist = object[c->offset]; | 1634 | c->freelist = get_freepointer(s, object); |
1648 | c->page->inuse = c->page->objects; | 1635 | c->page->inuse = c->page->objects; |
1649 | c->page->freelist = NULL; | 1636 | c->page->freelist = NULL; |
1650 | c->node = page_to_nid(c->page); | 1637 | c->node = page_to_nid(c->page); |
1651 | unlock_out: | 1638 | unlock_out: |
1652 | slab_unlock(c->page); | 1639 | slab_unlock(c->page); |
1653 | stat(c, ALLOC_SLOWPATH); | 1640 | stat(s, ALLOC_SLOWPATH); |
1654 | return object; | 1641 | return object; |
1655 | 1642 | ||
1656 | another_slab: | 1643 | another_slab: |
@@ -1660,7 +1647,7 @@ new_slab: | |||
1660 | new = get_partial(s, gfpflags, node); | 1647 | new = get_partial(s, gfpflags, node); |
1661 | if (new) { | 1648 | if (new) { |
1662 | c->page = new; | 1649 | c->page = new; |
1663 | stat(c, ALLOC_FROM_PARTIAL); | 1650 | stat(s, ALLOC_FROM_PARTIAL); |
1664 | goto load_freelist; | 1651 | goto load_freelist; |
1665 | } | 1652 | } |
1666 | 1653 | ||
@@ -1673,8 +1660,8 @@ new_slab: | |||
1673 | local_irq_disable(); | 1660 | local_irq_disable(); |
1674 | 1661 | ||
1675 | if (new) { | 1662 | if (new) { |
1676 | c = get_cpu_slab(s, smp_processor_id()); | 1663 | c = __this_cpu_ptr(s->cpu_slab); |
1677 | stat(c, ALLOC_SLAB); | 1664 | stat(s, ALLOC_SLAB); |
1678 | if (c->page) | 1665 | if (c->page) |
1679 | flush_slab(s, c); | 1666 | flush_slab(s, c); |
1680 | slab_lock(new); | 1667 | slab_lock(new); |
@@ -1690,7 +1677,7 @@ debug: | |||
1690 | goto another_slab; | 1677 | goto another_slab; |
1691 | 1678 | ||
1692 | c->page->inuse++; | 1679 | c->page->inuse++; |
1693 | c->page->freelist = object[c->offset]; | 1680 | c->page->freelist = get_freepointer(s, object); |
1694 | c->node = -1; | 1681 | c->node = -1; |
1695 | goto unlock_out; | 1682 | goto unlock_out; |
1696 | } | 1683 | } |
@@ -1711,35 +1698,33 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
1711 | void **object; | 1698 | void **object; |
1712 | struct kmem_cache_cpu *c; | 1699 | struct kmem_cache_cpu *c; |
1713 | unsigned long flags; | 1700 | unsigned long flags; |
1714 | unsigned int objsize; | ||
1715 | 1701 | ||
1716 | gfpflags &= gfp_allowed_mask; | 1702 | gfpflags &= gfp_allowed_mask; |
1717 | 1703 | ||
1718 | lockdep_trace_alloc(gfpflags); | 1704 | lockdep_trace_alloc(gfpflags); |
1719 | might_sleep_if(gfpflags & __GFP_WAIT); | 1705 | might_sleep_if(gfpflags & __GFP_WAIT); |
1720 | 1706 | ||
1721 | if (should_failslab(s->objsize, gfpflags)) | 1707 | if (should_failslab(s->objsize, gfpflags, s->flags)) |
1722 | return NULL; | 1708 | return NULL; |
1723 | 1709 | ||
1724 | local_irq_save(flags); | 1710 | local_irq_save(flags); |
1725 | c = get_cpu_slab(s, smp_processor_id()); | 1711 | c = __this_cpu_ptr(s->cpu_slab); |
1726 | objsize = c->objsize; | 1712 | object = c->freelist; |
1727 | if (unlikely(!c->freelist || !node_match(c, node))) | 1713 | if (unlikely(!object || !node_match(c, node))) |
1728 | 1714 | ||
1729 | object = __slab_alloc(s, gfpflags, node, addr, c); | 1715 | object = __slab_alloc(s, gfpflags, node, addr, c); |
1730 | 1716 | ||
1731 | else { | 1717 | else { |
1732 | object = c->freelist; | 1718 | c->freelist = get_freepointer(s, object); |
1733 | c->freelist = object[c->offset]; | 1719 | stat(s, ALLOC_FASTPATH); |
1734 | stat(c, ALLOC_FASTPATH); | ||
1735 | } | 1720 | } |
1736 | local_irq_restore(flags); | 1721 | local_irq_restore(flags); |
1737 | 1722 | ||
1738 | if (unlikely(gfpflags & __GFP_ZERO) && object) | 1723 | if (unlikely(gfpflags & __GFP_ZERO) && object) |
1739 | memset(object, 0, objsize); | 1724 | memset(object, 0, s->objsize); |
1740 | 1725 | ||
1741 | kmemcheck_slab_alloc(s, gfpflags, object, c->objsize); | 1726 | kmemcheck_slab_alloc(s, gfpflags, object, s->objsize); |
1742 | kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags); | 1727 | kmemleak_alloc_recursive(object, s->objsize, 1, s->flags, gfpflags); |
1743 | 1728 | ||
1744 | return object; | 1729 | return object; |
1745 | } | 1730 | } |
@@ -1794,26 +1779,25 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_notrace); | |||
1794 | * handling required then we can return immediately. | 1779 | * handling required then we can return immediately. |
1795 | */ | 1780 | */ |
1796 | static void __slab_free(struct kmem_cache *s, struct page *page, | 1781 | static void __slab_free(struct kmem_cache *s, struct page *page, |
1797 | void *x, unsigned long addr, unsigned int offset) | 1782 | void *x, unsigned long addr) |
1798 | { | 1783 | { |
1799 | void *prior; | 1784 | void *prior; |
1800 | void **object = (void *)x; | 1785 | void **object = (void *)x; |
1801 | struct kmem_cache_cpu *c; | ||
1802 | 1786 | ||
1803 | c = get_cpu_slab(s, raw_smp_processor_id()); | 1787 | stat(s, FREE_SLOWPATH); |
1804 | stat(c, FREE_SLOWPATH); | ||
1805 | slab_lock(page); | 1788 | slab_lock(page); |
1806 | 1789 | ||
1807 | if (unlikely(SLABDEBUG && PageSlubDebug(page))) | 1790 | if (unlikely(SLABDEBUG && PageSlubDebug(page))) |
1808 | goto debug; | 1791 | goto debug; |
1809 | 1792 | ||
1810 | checks_ok: | 1793 | checks_ok: |
1811 | prior = object[offset] = page->freelist; | 1794 | prior = page->freelist; |
1795 | set_freepointer(s, object, prior); | ||
1812 | page->freelist = object; | 1796 | page->freelist = object; |
1813 | page->inuse--; | 1797 | page->inuse--; |
1814 | 1798 | ||
1815 | if (unlikely(PageSlubFrozen(page))) { | 1799 | if (unlikely(PageSlubFrozen(page))) { |
1816 | stat(c, FREE_FROZEN); | 1800 | stat(s, FREE_FROZEN); |
1817 | goto out_unlock; | 1801 | goto out_unlock; |
1818 | } | 1802 | } |
1819 | 1803 | ||
@@ -1826,7 +1810,7 @@ checks_ok: | |||
1826 | */ | 1810 | */ |
1827 | if (unlikely(!prior)) { | 1811 | if (unlikely(!prior)) { |
1828 | add_partial(get_node(s, page_to_nid(page)), page, 1); | 1812 | add_partial(get_node(s, page_to_nid(page)), page, 1); |
1829 | stat(c, FREE_ADD_PARTIAL); | 1813 | stat(s, FREE_ADD_PARTIAL); |
1830 | } | 1814 | } |
1831 | 1815 | ||
1832 | out_unlock: | 1816 | out_unlock: |
@@ -1839,10 +1823,10 @@ slab_empty: | |||
1839 | * Slab still on the partial list. | 1823 | * Slab still on the partial list. |
1840 | */ | 1824 | */ |
1841 | remove_partial(s, page); | 1825 | remove_partial(s, page); |
1842 | stat(c, FREE_REMOVE_PARTIAL); | 1826 | stat(s, FREE_REMOVE_PARTIAL); |
1843 | } | 1827 | } |
1844 | slab_unlock(page); | 1828 | slab_unlock(page); |
1845 | stat(c, FREE_SLAB); | 1829 | stat(s, FREE_SLAB); |
1846 | discard_slab(s, page); | 1830 | discard_slab(s, page); |
1847 | return; | 1831 | return; |
1848 | 1832 | ||
@@ -1872,17 +1856,17 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
1872 | 1856 | ||
1873 | kmemleak_free_recursive(x, s->flags); | 1857 | kmemleak_free_recursive(x, s->flags); |
1874 | local_irq_save(flags); | 1858 | local_irq_save(flags); |
1875 | c = get_cpu_slab(s, smp_processor_id()); | 1859 | c = __this_cpu_ptr(s->cpu_slab); |
1876 | kmemcheck_slab_free(s, object, c->objsize); | 1860 | kmemcheck_slab_free(s, object, s->objsize); |
1877 | debug_check_no_locks_freed(object, c->objsize); | 1861 | debug_check_no_locks_freed(object, s->objsize); |
1878 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) | 1862 | if (!(s->flags & SLAB_DEBUG_OBJECTS)) |
1879 | debug_check_no_obj_freed(object, c->objsize); | 1863 | debug_check_no_obj_freed(object, s->objsize); |
1880 | if (likely(page == c->page && c->node >= 0)) { | 1864 | if (likely(page == c->page && c->node >= 0)) { |
1881 | object[c->offset] = c->freelist; | 1865 | set_freepointer(s, object, c->freelist); |
1882 | c->freelist = object; | 1866 | c->freelist = object; |
1883 | stat(c, FREE_FASTPATH); | 1867 | stat(s, FREE_FASTPATH); |
1884 | } else | 1868 | } else |
1885 | __slab_free(s, page, x, addr, c->offset); | 1869 | __slab_free(s, page, x, addr); |
1886 | 1870 | ||
1887 | local_irq_restore(flags); | 1871 | local_irq_restore(flags); |
1888 | } | 1872 | } |
@@ -2069,19 +2053,6 @@ static unsigned long calculate_alignment(unsigned long flags, | |||
2069 | return ALIGN(align, sizeof(void *)); | 2053 | return ALIGN(align, sizeof(void *)); |
2070 | } | 2054 | } |
2071 | 2055 | ||
2072 | static void init_kmem_cache_cpu(struct kmem_cache *s, | ||
2073 | struct kmem_cache_cpu *c) | ||
2074 | { | ||
2075 | c->page = NULL; | ||
2076 | c->freelist = NULL; | ||
2077 | c->node = 0; | ||
2078 | c->offset = s->offset / sizeof(void *); | ||
2079 | c->objsize = s->objsize; | ||
2080 | #ifdef CONFIG_SLUB_STATS | ||
2081 | memset(c->stat, 0, NR_SLUB_STAT_ITEMS * sizeof(unsigned)); | ||
2082 | #endif | ||
2083 | } | ||
2084 | |||
2085 | static void | 2056 | static void |
2086 | init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) | 2057 | init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) |
2087 | { | 2058 | { |
@@ -2095,130 +2066,24 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s) | |||
2095 | #endif | 2066 | #endif |
2096 | } | 2067 | } |
2097 | 2068 | ||
2098 | #ifdef CONFIG_SMP | 2069 | static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[KMALLOC_CACHES]); |
2099 | /* | ||
2100 | * Per cpu array for per cpu structures. | ||
2101 | * | ||
2102 | * The per cpu array places all kmem_cache_cpu structures from one processor | ||
2103 | * close together meaning that it becomes possible that multiple per cpu | ||
2104 | * structures are contained in one cacheline. This may be particularly | ||
2105 | * beneficial for the kmalloc caches. | ||
2106 | * | ||
2107 | * A desktop system typically has around 60-80 slabs. With 100 here we are | ||
2108 | * likely able to get per cpu structures for all caches from the array defined | ||
2109 | * here. We must be able to cover all kmalloc caches during bootstrap. | ||
2110 | * | ||
2111 | * If the per cpu array is exhausted then fall back to kmalloc | ||
2112 | * of individual cachelines. No sharing is possible then. | ||
2113 | */ | ||
2114 | #define NR_KMEM_CACHE_CPU 100 | ||
2115 | |||
2116 | static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU], | ||
2117 | kmem_cache_cpu); | ||
2118 | |||
2119 | static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free); | ||
2120 | static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS); | ||
2121 | |||
2122 | static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s, | ||
2123 | int cpu, gfp_t flags) | ||
2124 | { | ||
2125 | struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu); | ||
2126 | |||
2127 | if (c) | ||
2128 | per_cpu(kmem_cache_cpu_free, cpu) = | ||
2129 | (void *)c->freelist; | ||
2130 | else { | ||
2131 | /* Table overflow: So allocate ourselves */ | ||
2132 | c = kmalloc_node( | ||
2133 | ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()), | ||
2134 | flags, cpu_to_node(cpu)); | ||
2135 | if (!c) | ||
2136 | return NULL; | ||
2137 | } | ||
2138 | |||
2139 | init_kmem_cache_cpu(s, c); | ||
2140 | return c; | ||
2141 | } | ||
2142 | |||
2143 | static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu) | ||
2144 | { | ||
2145 | if (c < per_cpu(kmem_cache_cpu, cpu) || | ||
2146 | c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) { | ||
2147 | kfree(c); | ||
2148 | return; | ||
2149 | } | ||
2150 | c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu); | ||
2151 | per_cpu(kmem_cache_cpu_free, cpu) = c; | ||
2152 | } | ||
2153 | |||
2154 | static void free_kmem_cache_cpus(struct kmem_cache *s) | ||
2155 | { | ||
2156 | int cpu; | ||
2157 | |||
2158 | for_each_online_cpu(cpu) { | ||
2159 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); | ||
2160 | |||
2161 | if (c) { | ||
2162 | s->cpu_slab[cpu] = NULL; | ||
2163 | free_kmem_cache_cpu(c, cpu); | ||
2164 | } | ||
2165 | } | ||
2166 | } | ||
2167 | |||
2168 | static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) | ||
2169 | { | ||
2170 | int cpu; | ||
2171 | |||
2172 | for_each_online_cpu(cpu) { | ||
2173 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); | ||
2174 | |||
2175 | if (c) | ||
2176 | continue; | ||
2177 | |||
2178 | c = alloc_kmem_cache_cpu(s, cpu, flags); | ||
2179 | if (!c) { | ||
2180 | free_kmem_cache_cpus(s); | ||
2181 | return 0; | ||
2182 | } | ||
2183 | s->cpu_slab[cpu] = c; | ||
2184 | } | ||
2185 | return 1; | ||
2186 | } | ||
2187 | |||
2188 | /* | ||
2189 | * Initialize the per cpu array. | ||
2190 | */ | ||
2191 | static void init_alloc_cpu_cpu(int cpu) | ||
2192 | { | ||
2193 | int i; | ||
2194 | 2070 | ||
2195 | if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once))) | 2071 | static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) |
2196 | return; | ||
2197 | |||
2198 | for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--) | ||
2199 | free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu); | ||
2200 | |||
2201 | cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)); | ||
2202 | } | ||
2203 | |||
2204 | static void __init init_alloc_cpu(void) | ||
2205 | { | 2072 | { |
2206 | int cpu; | 2073 | if (s < kmalloc_caches + KMALLOC_CACHES && s >= kmalloc_caches) |
2207 | 2074 | /* | |
2208 | for_each_online_cpu(cpu) | 2075 | * Boot time creation of the kmalloc array. Use static per cpu data |
2209 | init_alloc_cpu_cpu(cpu); | 2076 | * since the per cpu allocator is not available yet. |
2210 | } | 2077 | */ |
2078 | s->cpu_slab = per_cpu_var(kmalloc_percpu) + (s - kmalloc_caches); | ||
2079 | else | ||
2080 | s->cpu_slab = alloc_percpu(struct kmem_cache_cpu); | ||
2211 | 2081 | ||
2212 | #else | 2082 | if (!s->cpu_slab) |
2213 | static inline void free_kmem_cache_cpus(struct kmem_cache *s) {} | 2083 | return 0; |
2214 | static inline void init_alloc_cpu(void) {} | ||
2215 | 2084 | ||
2216 | static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags) | ||
2217 | { | ||
2218 | init_kmem_cache_cpu(s, &s->cpu_slab); | ||
2219 | return 1; | 2085 | return 1; |
2220 | } | 2086 | } |
2221 | #endif | ||
2222 | 2087 | ||
2223 | #ifdef CONFIG_NUMA | 2088 | #ifdef CONFIG_NUMA |
2224 | /* | 2089 | /* |
@@ -2287,7 +2152,8 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags) | |||
2287 | int node; | 2152 | int node; |
2288 | int local_node; | 2153 | int local_node; |
2289 | 2154 | ||
2290 | if (slab_state >= UP) | 2155 | if (slab_state >= UP && (s < kmalloc_caches || |
2156 | s > kmalloc_caches + KMALLOC_CACHES)) | ||
2291 | local_node = page_to_nid(virt_to_page(s)); | 2157 | local_node = page_to_nid(virt_to_page(s)); |
2292 | else | 2158 | else |
2293 | local_node = 0; | 2159 | local_node = 0; |
@@ -2502,6 +2368,7 @@ static int kmem_cache_open(struct kmem_cache *s, gfp_t gfpflags, | |||
2502 | 2368 | ||
2503 | if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) | 2369 | if (alloc_kmem_cache_cpus(s, gfpflags & ~SLUB_DMA)) |
2504 | return 1; | 2370 | return 1; |
2371 | |||
2505 | free_kmem_cache_nodes(s); | 2372 | free_kmem_cache_nodes(s); |
2506 | error: | 2373 | error: |
2507 | if (flags & SLAB_PANIC) | 2374 | if (flags & SLAB_PANIC) |
@@ -2609,9 +2476,8 @@ static inline int kmem_cache_close(struct kmem_cache *s) | |||
2609 | int node; | 2476 | int node; |
2610 | 2477 | ||
2611 | flush_all(s); | 2478 | flush_all(s); |
2612 | 2479 | free_percpu(s->cpu_slab); | |
2613 | /* Attempt to free all objects */ | 2480 | /* Attempt to free all objects */ |
2614 | free_kmem_cache_cpus(s); | ||
2615 | for_each_node_state(node, N_NORMAL_MEMORY) { | 2481 | for_each_node_state(node, N_NORMAL_MEMORY) { |
2616 | struct kmem_cache_node *n = get_node(s, node); | 2482 | struct kmem_cache_node *n = get_node(s, node); |
2617 | 2483 | ||
@@ -2651,7 +2517,7 @@ EXPORT_SYMBOL(kmem_cache_destroy); | |||
2651 | * Kmalloc subsystem | 2517 | * Kmalloc subsystem |
2652 | *******************************************************************/ | 2518 | *******************************************************************/ |
2653 | 2519 | ||
2654 | struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned; | 2520 | struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned; |
2655 | EXPORT_SYMBOL(kmalloc_caches); | 2521 | EXPORT_SYMBOL(kmalloc_caches); |
2656 | 2522 | ||
2657 | static int __init setup_slub_min_order(char *str) | 2523 | static int __init setup_slub_min_order(char *str) |
@@ -2741,6 +2607,7 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) | |||
2741 | char *text; | 2607 | char *text; |
2742 | size_t realsize; | 2608 | size_t realsize; |
2743 | unsigned long slabflags; | 2609 | unsigned long slabflags; |
2610 | int i; | ||
2744 | 2611 | ||
2745 | s = kmalloc_caches_dma[index]; | 2612 | s = kmalloc_caches_dma[index]; |
2746 | if (s) | 2613 | if (s) |
@@ -2760,7 +2627,14 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) | |||
2760 | realsize = kmalloc_caches[index].objsize; | 2627 | realsize = kmalloc_caches[index].objsize; |
2761 | text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", | 2628 | text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", |
2762 | (unsigned int)realsize); | 2629 | (unsigned int)realsize); |
2763 | s = kmalloc(kmem_size, flags & ~SLUB_DMA); | 2630 | |
2631 | s = NULL; | ||
2632 | for (i = 0; i < KMALLOC_CACHES; i++) | ||
2633 | if (!kmalloc_caches[i].size) | ||
2634 | break; | ||
2635 | |||
2636 | BUG_ON(i >= KMALLOC_CACHES); | ||
2637 | s = kmalloc_caches + i; | ||
2764 | 2638 | ||
2765 | /* | 2639 | /* |
2766 | * Must defer sysfs creation to a workqueue because we don't know | 2640 | * Must defer sysfs creation to a workqueue because we don't know |
@@ -2772,9 +2646,9 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) | |||
2772 | if (slab_state >= SYSFS) | 2646 | if (slab_state >= SYSFS) |
2773 | slabflags |= __SYSFS_ADD_DEFERRED; | 2647 | slabflags |= __SYSFS_ADD_DEFERRED; |
2774 | 2648 | ||
2775 | if (!s || !text || !kmem_cache_open(s, flags, text, | 2649 | if (!text || !kmem_cache_open(s, flags, text, |
2776 | realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) { | 2650 | realsize, ARCH_KMALLOC_MINALIGN, slabflags, NULL)) { |
2777 | kfree(s); | 2651 | s->size = 0; |
2778 | kfree(text); | 2652 | kfree(text); |
2779 | goto unlock_out; | 2653 | goto unlock_out; |
2780 | } | 2654 | } |
@@ -3176,8 +3050,6 @@ void __init kmem_cache_init(void) | |||
3176 | int i; | 3050 | int i; |
3177 | int caches = 0; | 3051 | int caches = 0; |
3178 | 3052 | ||
3179 | init_alloc_cpu(); | ||
3180 | |||
3181 | #ifdef CONFIG_NUMA | 3053 | #ifdef CONFIG_NUMA |
3182 | /* | 3054 | /* |
3183 | * Must first have the slab cache available for the allocations of the | 3055 | * Must first have the slab cache available for the allocations of the |
@@ -3261,8 +3133,10 @@ void __init kmem_cache_init(void) | |||
3261 | 3133 | ||
3262 | #ifdef CONFIG_SMP | 3134 | #ifdef CONFIG_SMP |
3263 | register_cpu_notifier(&slab_notifier); | 3135 | register_cpu_notifier(&slab_notifier); |
3264 | kmem_size = offsetof(struct kmem_cache, cpu_slab) + | 3136 | #endif |
3265 | nr_cpu_ids * sizeof(struct kmem_cache_cpu *); | 3137 | #ifdef CONFIG_NUMA |
3138 | kmem_size = offsetof(struct kmem_cache, node) + | ||
3139 | nr_node_ids * sizeof(struct kmem_cache_node *); | ||
3266 | #else | 3140 | #else |
3267 | kmem_size = sizeof(struct kmem_cache); | 3141 | kmem_size = sizeof(struct kmem_cache); |
3268 | #endif | 3142 | #endif |
@@ -3351,22 +3225,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, | |||
3351 | down_write(&slub_lock); | 3225 | down_write(&slub_lock); |
3352 | s = find_mergeable(size, align, flags, name, ctor); | 3226 | s = find_mergeable(size, align, flags, name, ctor); |
3353 | if (s) { | 3227 | if (s) { |
3354 | int cpu; | ||
3355 | |||
3356 | s->refcount++; | 3228 | s->refcount++; |
3357 | /* | 3229 | /* |
3358 | * Adjust the object sizes so that we clear | 3230 | * Adjust the object sizes so that we clear |
3359 | * the complete object on kzalloc. | 3231 | * the complete object on kzalloc. |
3360 | */ | 3232 | */ |
3361 | s->objsize = max(s->objsize, (int)size); | 3233 | s->objsize = max(s->objsize, (int)size); |
3362 | |||
3363 | /* | ||
3364 | * And then we need to update the object size in the | ||
3365 | * per cpu structures | ||
3366 | */ | ||
3367 | for_each_online_cpu(cpu) | ||
3368 | get_cpu_slab(s, cpu)->objsize = s->objsize; | ||
3369 | |||
3370 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); | 3234 | s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); |
3371 | up_write(&slub_lock); | 3235 | up_write(&slub_lock); |
3372 | 3236 | ||
@@ -3420,29 +3284,15 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, | |||
3420 | unsigned long flags; | 3284 | unsigned long flags; |
3421 | 3285 | ||
3422 | switch (action) { | 3286 | switch (action) { |
3423 | case CPU_UP_PREPARE: | ||
3424 | case CPU_UP_PREPARE_FROZEN: | ||
3425 | init_alloc_cpu_cpu(cpu); | ||
3426 | down_read(&slub_lock); | ||
3427 | list_for_each_entry(s, &slab_caches, list) | ||
3428 | s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu, | ||
3429 | GFP_KERNEL); | ||
3430 | up_read(&slub_lock); | ||
3431 | break; | ||
3432 | |||
3433 | case CPU_UP_CANCELED: | 3287 | case CPU_UP_CANCELED: |
3434 | case CPU_UP_CANCELED_FROZEN: | 3288 | case CPU_UP_CANCELED_FROZEN: |
3435 | case CPU_DEAD: | 3289 | case CPU_DEAD: |
3436 | case CPU_DEAD_FROZEN: | 3290 | case CPU_DEAD_FROZEN: |
3437 | down_read(&slub_lock); | 3291 | down_read(&slub_lock); |
3438 | list_for_each_entry(s, &slab_caches, list) { | 3292 | list_for_each_entry(s, &slab_caches, list) { |
3439 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); | ||
3440 | |||
3441 | local_irq_save(flags); | 3293 | local_irq_save(flags); |
3442 | __flush_cpu_slab(s, cpu); | 3294 | __flush_cpu_slab(s, cpu); |
3443 | local_irq_restore(flags); | 3295 | local_irq_restore(flags); |
3444 | free_kmem_cache_cpu(c, cpu); | ||
3445 | s->cpu_slab[cpu] = NULL; | ||
3446 | } | 3296 | } |
3447 | up_read(&slub_lock); | 3297 | up_read(&slub_lock); |
3448 | break; | 3298 | break; |
@@ -3928,7 +3778,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, | |||
3928 | int cpu; | 3778 | int cpu; |
3929 | 3779 | ||
3930 | for_each_possible_cpu(cpu) { | 3780 | for_each_possible_cpu(cpu) { |
3931 | struct kmem_cache_cpu *c = get_cpu_slab(s, cpu); | 3781 | struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu); |
3932 | 3782 | ||
3933 | if (!c || c->node < 0) | 3783 | if (!c || c->node < 0) |
3934 | continue; | 3784 | continue; |
@@ -4171,6 +4021,23 @@ static ssize_t trace_store(struct kmem_cache *s, const char *buf, | |||
4171 | } | 4021 | } |
4172 | SLAB_ATTR(trace); | 4022 | SLAB_ATTR(trace); |
4173 | 4023 | ||
4024 | #ifdef CONFIG_FAILSLAB | ||
4025 | static ssize_t failslab_show(struct kmem_cache *s, char *buf) | ||
4026 | { | ||
4027 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_FAILSLAB)); | ||
4028 | } | ||
4029 | |||
4030 | static ssize_t failslab_store(struct kmem_cache *s, const char *buf, | ||
4031 | size_t length) | ||
4032 | { | ||
4033 | s->flags &= ~SLAB_FAILSLAB; | ||
4034 | if (buf[0] == '1') | ||
4035 | s->flags |= SLAB_FAILSLAB; | ||
4036 | return length; | ||
4037 | } | ||
4038 | SLAB_ATTR(failslab); | ||
4039 | #endif | ||
4040 | |||
4174 | static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) | 4041 | static ssize_t reclaim_account_show(struct kmem_cache *s, char *buf) |
4175 | { | 4042 | { |
4176 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); | 4043 | return sprintf(buf, "%d\n", !!(s->flags & SLAB_RECLAIM_ACCOUNT)); |
@@ -4353,7 +4220,7 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) | |||
4353 | return -ENOMEM; | 4220 | return -ENOMEM; |
4354 | 4221 | ||
4355 | for_each_online_cpu(cpu) { | 4222 | for_each_online_cpu(cpu) { |
4356 | unsigned x = get_cpu_slab(s, cpu)->stat[si]; | 4223 | unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si]; |
4357 | 4224 | ||
4358 | data[cpu] = x; | 4225 | data[cpu] = x; |
4359 | sum += x; | 4226 | sum += x; |
@@ -4376,7 +4243,7 @@ static void clear_stat(struct kmem_cache *s, enum stat_item si) | |||
4376 | int cpu; | 4243 | int cpu; |
4377 | 4244 | ||
4378 | for_each_online_cpu(cpu) | 4245 | for_each_online_cpu(cpu) |
4379 | get_cpu_slab(s, cpu)->stat[si] = 0; | 4246 | per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0; |
4380 | } | 4247 | } |
4381 | 4248 | ||
4382 | #define STAT_ATTR(si, text) \ | 4249 | #define STAT_ATTR(si, text) \ |
@@ -4467,6 +4334,10 @@ static struct attribute *slab_attrs[] = { | |||
4467 | &deactivate_remote_frees_attr.attr, | 4334 | &deactivate_remote_frees_attr.attr, |
4468 | &order_fallback_attr.attr, | 4335 | &order_fallback_attr.attr, |
4469 | #endif | 4336 | #endif |
4337 | #ifdef CONFIG_FAILSLAB | ||
4338 | &failslab_attr.attr, | ||
4339 | #endif | ||
4340 | |||
4470 | NULL | 4341 | NULL |
4471 | }; | 4342 | }; |
4472 | 4343 | ||