aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slub.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c175
1 files changed, 62 insertions, 113 deletions
diff --git a/mm/slub.c b/mm/slub.c
index e2989ae243b5..74c65af0a54f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -149,13 +149,6 @@ static inline void ClearSlabDebug(struct page *page)
149/* Enable to test recovery from slab corruption on boot */ 149/* Enable to test recovery from slab corruption on boot */
150#undef SLUB_RESILIENCY_TEST 150#undef SLUB_RESILIENCY_TEST
151 151
152/*
153 * Currently fastpath is not supported if preemption is enabled.
154 */
155#if defined(CONFIG_FAST_CMPXCHG_LOCAL) && !defined(CONFIG_PREEMPT)
156#define SLUB_FASTPATH
157#endif
158
159#if PAGE_SHIFT <= 12 152#if PAGE_SHIFT <= 12
160 153
161/* 154/*
@@ -211,6 +204,8 @@ static inline void ClearSlabDebug(struct page *page)
211/* Internal SLUB flags */ 204/* Internal SLUB flags */
212#define __OBJECT_POISON 0x80000000 /* Poison object */ 205#define __OBJECT_POISON 0x80000000 /* Poison object */
213#define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ 206#define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */
207#define __KMALLOC_CACHE 0x20000000 /* objects freed using kfree */
208#define __PAGE_ALLOC_FALLBACK 0x10000000 /* Allow fallback to page alloc */
214 209
215/* Not all arches define cache_line_size */ 210/* Not all arches define cache_line_size */
216#ifndef cache_line_size 211#ifndef cache_line_size
@@ -308,7 +303,7 @@ static inline int is_end(void *addr)
308 return (unsigned long)addr & PAGE_MAPPING_ANON; 303 return (unsigned long)addr & PAGE_MAPPING_ANON;
309} 304}
310 305
311void *slab_address(struct page *page) 306static void *slab_address(struct page *page)
312{ 307{
313 return page->end - PAGE_MAPPING_ANON; 308 return page->end - PAGE_MAPPING_ANON;
314} 309}
@@ -1078,14 +1073,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1078 struct page *page; 1073 struct page *page;
1079 int pages = 1 << s->order; 1074 int pages = 1 << s->order;
1080 1075
1081 if (s->order) 1076 flags |= s->allocflags;
1082 flags |= __GFP_COMP;
1083
1084 if (s->flags & SLAB_CACHE_DMA)
1085 flags |= SLUB_DMA;
1086
1087 if (s->flags & SLAB_RECLAIM_ACCOUNT)
1088 flags |= __GFP_RECLAIMABLE;
1089 1077
1090 if (node == -1) 1078 if (node == -1)
1091 page = alloc_pages(flags, s->order); 1079 page = alloc_pages(flags, s->order);
@@ -1519,11 +1507,7 @@ static void *__slab_alloc(struct kmem_cache *s,
1519{ 1507{
1520 void **object; 1508 void **object;
1521 struct page *new; 1509 struct page *new;
1522#ifdef SLUB_FASTPATH
1523 unsigned long flags;
1524 1510
1525 local_irq_save(flags);
1526#endif
1527 if (!c->page) 1511 if (!c->page)
1528 goto new_slab; 1512 goto new_slab;
1529 1513
@@ -1546,10 +1530,6 @@ load_freelist:
1546unlock_out: 1530unlock_out:
1547 slab_unlock(c->page); 1531 slab_unlock(c->page);
1548 stat(c, ALLOC_SLOWPATH); 1532 stat(c, ALLOC_SLOWPATH);
1549out:
1550#ifdef SLUB_FASTPATH
1551 local_irq_restore(flags);
1552#endif
1553 return object; 1533 return object;
1554 1534
1555another_slab: 1535another_slab:
@@ -1581,8 +1561,22 @@ new_slab:
1581 c->page = new; 1561 c->page = new;
1582 goto load_freelist; 1562 goto load_freelist;
1583 } 1563 }
1584 object = NULL; 1564
1585 goto out; 1565 /*
1566 * No memory available.
1567 *
1568 * If the slab uses higher order allocs but the object is
1569 * smaller than a page size then we can fallback in emergencies
1570 * to the page allocator via kmalloc_large. The page allocator may
1571 * have failed to obtain a higher order page and we can try to
1572 * allocate a single page if the object fits into a single page.
1573 * That is only possible if certain conditions are met that are being
1574 * checked when a slab is created.
1575 */
1576 if (!(gfpflags & __GFP_NORETRY) && (s->flags & __PAGE_ALLOC_FALLBACK))
1577 return kmalloc_large(s->objsize, gfpflags);
1578
1579 return NULL;
1586debug: 1580debug:
1587 object = c->page->freelist; 1581 object = c->page->freelist;
1588 if (!alloc_debug_processing(s, c->page, object, addr)) 1582 if (!alloc_debug_processing(s, c->page, object, addr))
@@ -1609,34 +1603,6 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1609{ 1603{
1610 void **object; 1604 void **object;
1611 struct kmem_cache_cpu *c; 1605 struct kmem_cache_cpu *c;
1612
1613/*
1614 * The SLUB_FASTPATH path is provisional and is currently disabled if the
1615 * kernel is compiled with preemption or if the arch does not support
1616 * fast cmpxchg operations. There are a couple of coming changes that will
1617 * simplify matters and allow preemption. Ultimately we may end up making
1618 * SLUB_FASTPATH the default.
1619 *
1620 * 1. The introduction of the per cpu allocator will avoid array lookups
1621 * through get_cpu_slab(). A special register can be used instead.
1622 *
1623 * 2. The introduction of per cpu atomic operations (cpu_ops) means that
1624 * we can realize the logic here entirely with per cpu atomics. The
1625 * per cpu atomic ops will take care of the preemption issues.
1626 */
1627
1628#ifdef SLUB_FASTPATH
1629 c = get_cpu_slab(s, raw_smp_processor_id());
1630 do {
1631 object = c->freelist;
1632 if (unlikely(is_end(object) || !node_match(c, node))) {
1633 object = __slab_alloc(s, gfpflags, node, addr, c);
1634 break;
1635 }
1636 stat(c, ALLOC_FASTPATH);
1637 } while (cmpxchg_local(&c->freelist, object, object[c->offset])
1638 != object);
1639#else
1640 unsigned long flags; 1606 unsigned long flags;
1641 1607
1642 local_irq_save(flags); 1608 local_irq_save(flags);
@@ -1651,7 +1617,6 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1651 stat(c, ALLOC_FASTPATH); 1617 stat(c, ALLOC_FASTPATH);
1652 } 1618 }
1653 local_irq_restore(flags); 1619 local_irq_restore(flags);
1654#endif
1655 1620
1656 if (unlikely((gfpflags & __GFP_ZERO) && object)) 1621 if (unlikely((gfpflags & __GFP_ZERO) && object))
1657 memset(object, 0, c->objsize); 1622 memset(object, 0, c->objsize);
@@ -1688,11 +1653,6 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
1688 void **object = (void *)x; 1653 void **object = (void *)x;
1689 struct kmem_cache_cpu *c; 1654 struct kmem_cache_cpu *c;
1690 1655
1691#ifdef SLUB_FASTPATH
1692 unsigned long flags;
1693
1694 local_irq_save(flags);
1695#endif
1696 c = get_cpu_slab(s, raw_smp_processor_id()); 1656 c = get_cpu_slab(s, raw_smp_processor_id());
1697 stat(c, FREE_SLOWPATH); 1657 stat(c, FREE_SLOWPATH);
1698 slab_lock(page); 1658 slab_lock(page);
@@ -1724,9 +1684,6 @@ checks_ok:
1724 1684
1725out_unlock: 1685out_unlock:
1726 slab_unlock(page); 1686 slab_unlock(page);
1727#ifdef SLUB_FASTPATH
1728 local_irq_restore(flags);
1729#endif
1730 return; 1687 return;
1731 1688
1732slab_empty: 1689slab_empty:
@@ -1739,9 +1696,6 @@ slab_empty:
1739 } 1696 }
1740 slab_unlock(page); 1697 slab_unlock(page);
1741 stat(c, FREE_SLAB); 1698 stat(c, FREE_SLAB);
1742#ifdef SLUB_FASTPATH
1743 local_irq_restore(flags);
1744#endif
1745 discard_slab(s, page); 1699 discard_slab(s, page);
1746 return; 1700 return;
1747 1701
@@ -1767,34 +1721,6 @@ static __always_inline void slab_free(struct kmem_cache *s,
1767{ 1721{
1768 void **object = (void *)x; 1722 void **object = (void *)x;
1769 struct kmem_cache_cpu *c; 1723 struct kmem_cache_cpu *c;
1770
1771#ifdef SLUB_FASTPATH
1772 void **freelist;
1773
1774 c = get_cpu_slab(s, raw_smp_processor_id());
1775 debug_check_no_locks_freed(object, s->objsize);
1776 do {
1777 freelist = c->freelist;
1778 barrier();
1779 /*
1780 * If the compiler would reorder the retrieval of c->page to
1781 * come before c->freelist then an interrupt could
1782 * change the cpu slab before we retrieve c->freelist. We
1783 * could be matching on a page no longer active and put the
1784 * object onto the freelist of the wrong slab.
1785 *
1786 * On the other hand: If we already have the freelist pointer
1787 * then any change of cpu_slab will cause the cmpxchg to fail
1788 * since the freelist pointers are unique per slab.
1789 */
1790 if (unlikely(page != c->page || c->node < 0)) {
1791 __slab_free(s, page, x, addr, c->offset);
1792 break;
1793 }
1794 object[c->offset] = freelist;
1795 stat(c, FREE_FASTPATH);
1796 } while (cmpxchg_local(&c->freelist, freelist, object) != freelist);
1797#else
1798 unsigned long flags; 1724 unsigned long flags;
1799 1725
1800 local_irq_save(flags); 1726 local_irq_save(flags);
@@ -1808,7 +1734,6 @@ static __always_inline void slab_free(struct kmem_cache *s,
1808 __slab_free(s, page, x, addr, c->offset); 1734 __slab_free(s, page, x, addr, c->offset);
1809 1735
1810 local_irq_restore(flags); 1736 local_irq_restore(flags);
1811#endif
1812} 1737}
1813 1738
1814void kmem_cache_free(struct kmem_cache *s, void *x) 1739void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -2329,10 +2254,33 @@ static int calculate_sizes(struct kmem_cache *s)
2329 size = ALIGN(size, align); 2254 size = ALIGN(size, align);
2330 s->size = size; 2255 s->size = size;
2331 2256
2332 s->order = calculate_order(size); 2257 if ((flags & __KMALLOC_CACHE) &&
2258 PAGE_SIZE / size < slub_min_objects) {
2259 /*
2260 * Kmalloc cache that would not have enough objects in
2261 * an order 0 page. Kmalloc slabs can fallback to
2262 * page allocator order 0 allocs so take a reasonably large
2263 * order that will allows us a good number of objects.
2264 */
2265 s->order = max(slub_max_order, PAGE_ALLOC_COSTLY_ORDER);
2266 s->flags |= __PAGE_ALLOC_FALLBACK;
2267 s->allocflags |= __GFP_NOWARN;
2268 } else
2269 s->order = calculate_order(size);
2270
2333 if (s->order < 0) 2271 if (s->order < 0)
2334 return 0; 2272 return 0;
2335 2273
2274 s->allocflags = 0;
2275 if (s->order)
2276 s->allocflags |= __GFP_COMP;
2277
2278 if (s->flags & SLAB_CACHE_DMA)
2279 s->allocflags |= SLUB_DMA;
2280
2281 if (s->flags & SLAB_RECLAIM_ACCOUNT)
2282 s->allocflags |= __GFP_RECLAIMABLE;
2283
2336 /* 2284 /*
2337 * Determine the number of objects per slab 2285 * Determine the number of objects per slab
2338 */ 2286 */
@@ -2484,11 +2432,11 @@ EXPORT_SYMBOL(kmem_cache_destroy);
2484 * Kmalloc subsystem 2432 * Kmalloc subsystem
2485 *******************************************************************/ 2433 *******************************************************************/
2486 2434
2487struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned; 2435struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned;
2488EXPORT_SYMBOL(kmalloc_caches); 2436EXPORT_SYMBOL(kmalloc_caches);
2489 2437
2490#ifdef CONFIG_ZONE_DMA 2438#ifdef CONFIG_ZONE_DMA
2491static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT]; 2439static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1];
2492#endif 2440#endif
2493 2441
2494static int __init setup_slub_min_order(char *str) 2442static int __init setup_slub_min_order(char *str)
@@ -2536,7 +2484,7 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s,
2536 2484
2537 down_write(&slub_lock); 2485 down_write(&slub_lock);
2538 if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, 2486 if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
2539 flags, NULL)) 2487 flags | __KMALLOC_CACHE, NULL))
2540 goto panic; 2488 goto panic;
2541 2489
2542 list_add(&s->list, &slab_caches); 2490 list_add(&s->list, &slab_caches);
@@ -2670,9 +2618,8 @@ void *__kmalloc(size_t size, gfp_t flags)
2670{ 2618{
2671 struct kmem_cache *s; 2619 struct kmem_cache *s;
2672 2620
2673 if (unlikely(size > PAGE_SIZE / 2)) 2621 if (unlikely(size > PAGE_SIZE))
2674 return (void *)__get_free_pages(flags | __GFP_COMP, 2622 return kmalloc_large(size, flags);
2675 get_order(size));
2676 2623
2677 s = get_slab(size, flags); 2624 s = get_slab(size, flags);
2678 2625
@@ -2688,9 +2635,8 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
2688{ 2635{
2689 struct kmem_cache *s; 2636 struct kmem_cache *s;
2690 2637
2691 if (unlikely(size > PAGE_SIZE / 2)) 2638 if (unlikely(size > PAGE_SIZE))
2692 return (void *)__get_free_pages(flags | __GFP_COMP, 2639 return kmalloc_large(size, flags);
2693 get_order(size));
2694 2640
2695 s = get_slab(size, flags); 2641 s = get_slab(size, flags);
2696 2642
@@ -3001,7 +2947,7 @@ void __init kmem_cache_init(void)
3001 caches++; 2947 caches++;
3002 } 2948 }
3003 2949
3004 for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) { 2950 for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) {
3005 create_kmalloc_cache(&kmalloc_caches[i], 2951 create_kmalloc_cache(&kmalloc_caches[i],
3006 "kmalloc", 1 << i, GFP_KERNEL); 2952 "kmalloc", 1 << i, GFP_KERNEL);
3007 caches++; 2953 caches++;
@@ -3028,7 +2974,7 @@ void __init kmem_cache_init(void)
3028 slab_state = UP; 2974 slab_state = UP;
3029 2975
3030 /* Provide the correct kmalloc names now that the caches are up */ 2976 /* Provide the correct kmalloc names now that the caches are up */
3031 for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) 2977 for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++)
3032 kmalloc_caches[i]. name = 2978 kmalloc_caches[i]. name =
3033 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); 2979 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
3034 2980
@@ -3057,6 +3003,9 @@ static int slab_unmergeable(struct kmem_cache *s)
3057 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) 3003 if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
3058 return 1; 3004 return 1;
3059 3005
3006 if ((s->flags & __PAGE_ALLOC_FALLBACK))
3007 return 1;
3008
3060 if (s->ctor) 3009 if (s->ctor)
3061 return 1; 3010 return 1;
3062 3011
@@ -3218,9 +3167,9 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
3218{ 3167{
3219 struct kmem_cache *s; 3168 struct kmem_cache *s;
3220 3169
3221 if (unlikely(size > PAGE_SIZE / 2)) 3170 if (unlikely(size > PAGE_SIZE))
3222 return (void *)__get_free_pages(gfpflags | __GFP_COMP, 3171 return kmalloc_large(size, gfpflags);
3223 get_order(size)); 3172
3224 s = get_slab(size, gfpflags); 3173 s = get_slab(size, gfpflags);
3225 3174
3226 if (unlikely(ZERO_OR_NULL_PTR(s))) 3175 if (unlikely(ZERO_OR_NULL_PTR(s)))
@@ -3234,9 +3183,9 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3234{ 3183{
3235 struct kmem_cache *s; 3184 struct kmem_cache *s;
3236 3185
3237 if (unlikely(size > PAGE_SIZE / 2)) 3186 if (unlikely(size > PAGE_SIZE))
3238 return (void *)__get_free_pages(gfpflags | __GFP_COMP, 3187 return kmalloc_large(size, gfpflags);
3239 get_order(size)); 3188
3240 s = get_slab(size, gfpflags); 3189 s = get_slab(size, gfpflags);
3241 3190
3242 if (unlikely(ZERO_OR_NULL_PTR(s))) 3191 if (unlikely(ZERO_OR_NULL_PTR(s)))