diff options
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 175 |
1 files changed, 62 insertions, 113 deletions
@@ -149,13 +149,6 @@ static inline void ClearSlabDebug(struct page *page) | |||
149 | /* Enable to test recovery from slab corruption on boot */ | 149 | /* Enable to test recovery from slab corruption on boot */ |
150 | #undef SLUB_RESILIENCY_TEST | 150 | #undef SLUB_RESILIENCY_TEST |
151 | 151 | ||
152 | /* | ||
153 | * Currently fastpath is not supported if preemption is enabled. | ||
154 | */ | ||
155 | #if defined(CONFIG_FAST_CMPXCHG_LOCAL) && !defined(CONFIG_PREEMPT) | ||
156 | #define SLUB_FASTPATH | ||
157 | #endif | ||
158 | |||
159 | #if PAGE_SHIFT <= 12 | 152 | #if PAGE_SHIFT <= 12 |
160 | 153 | ||
161 | /* | 154 | /* |
@@ -211,6 +204,8 @@ static inline void ClearSlabDebug(struct page *page) | |||
211 | /* Internal SLUB flags */ | 204 | /* Internal SLUB flags */ |
212 | #define __OBJECT_POISON 0x80000000 /* Poison object */ | 205 | #define __OBJECT_POISON 0x80000000 /* Poison object */ |
213 | #define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ | 206 | #define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ |
207 | #define __KMALLOC_CACHE 0x20000000 /* objects freed using kfree */ | ||
208 | #define __PAGE_ALLOC_FALLBACK 0x10000000 /* Allow fallback to page alloc */ | ||
214 | 209 | ||
215 | /* Not all arches define cache_line_size */ | 210 | /* Not all arches define cache_line_size */ |
216 | #ifndef cache_line_size | 211 | #ifndef cache_line_size |
@@ -308,7 +303,7 @@ static inline int is_end(void *addr) | |||
308 | return (unsigned long)addr & PAGE_MAPPING_ANON; | 303 | return (unsigned long)addr & PAGE_MAPPING_ANON; |
309 | } | 304 | } |
310 | 305 | ||
311 | void *slab_address(struct page *page) | 306 | static void *slab_address(struct page *page) |
312 | { | 307 | { |
313 | return page->end - PAGE_MAPPING_ANON; | 308 | return page->end - PAGE_MAPPING_ANON; |
314 | } | 309 | } |
@@ -1078,14 +1073,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1078 | struct page *page; | 1073 | struct page *page; |
1079 | int pages = 1 << s->order; | 1074 | int pages = 1 << s->order; |
1080 | 1075 | ||
1081 | if (s->order) | 1076 | flags |= s->allocflags; |
1082 | flags |= __GFP_COMP; | ||
1083 | |||
1084 | if (s->flags & SLAB_CACHE_DMA) | ||
1085 | flags |= SLUB_DMA; | ||
1086 | |||
1087 | if (s->flags & SLAB_RECLAIM_ACCOUNT) | ||
1088 | flags |= __GFP_RECLAIMABLE; | ||
1089 | 1077 | ||
1090 | if (node == -1) | 1078 | if (node == -1) |
1091 | page = alloc_pages(flags, s->order); | 1079 | page = alloc_pages(flags, s->order); |
@@ -1519,11 +1507,7 @@ static void *__slab_alloc(struct kmem_cache *s, | |||
1519 | { | 1507 | { |
1520 | void **object; | 1508 | void **object; |
1521 | struct page *new; | 1509 | struct page *new; |
1522 | #ifdef SLUB_FASTPATH | ||
1523 | unsigned long flags; | ||
1524 | 1510 | ||
1525 | local_irq_save(flags); | ||
1526 | #endif | ||
1527 | if (!c->page) | 1511 | if (!c->page) |
1528 | goto new_slab; | 1512 | goto new_slab; |
1529 | 1513 | ||
@@ -1546,10 +1530,6 @@ load_freelist: | |||
1546 | unlock_out: | 1530 | unlock_out: |
1547 | slab_unlock(c->page); | 1531 | slab_unlock(c->page); |
1548 | stat(c, ALLOC_SLOWPATH); | 1532 | stat(c, ALLOC_SLOWPATH); |
1549 | out: | ||
1550 | #ifdef SLUB_FASTPATH | ||
1551 | local_irq_restore(flags); | ||
1552 | #endif | ||
1553 | return object; | 1533 | return object; |
1554 | 1534 | ||
1555 | another_slab: | 1535 | another_slab: |
@@ -1581,8 +1561,22 @@ new_slab: | |||
1581 | c->page = new; | 1561 | c->page = new; |
1582 | goto load_freelist; | 1562 | goto load_freelist; |
1583 | } | 1563 | } |
1584 | object = NULL; | 1564 | |
1585 | goto out; | 1565 | /* |
1566 | * No memory available. | ||
1567 | * | ||
1568 | * If the slab uses higher order allocs but the object is | ||
1569 | * smaller than a page size then we can fallback in emergencies | ||
1570 | * to the page allocator via kmalloc_large. The page allocator may | ||
1571 | * have failed to obtain a higher order page and we can try to | ||
1572 | * allocate a single page if the object fits into a single page. | ||
1573 | * That is only possible if certain conditions are met that are being | ||
1574 | * checked when a slab is created. | ||
1575 | */ | ||
1576 | if (!(gfpflags & __GFP_NORETRY) && (s->flags & __PAGE_ALLOC_FALLBACK)) | ||
1577 | return kmalloc_large(s->objsize, gfpflags); | ||
1578 | |||
1579 | return NULL; | ||
1586 | debug: | 1580 | debug: |
1587 | object = c->page->freelist; | 1581 | object = c->page->freelist; |
1588 | if (!alloc_debug_processing(s, c->page, object, addr)) | 1582 | if (!alloc_debug_processing(s, c->page, object, addr)) |
@@ -1609,34 +1603,6 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
1609 | { | 1603 | { |
1610 | void **object; | 1604 | void **object; |
1611 | struct kmem_cache_cpu *c; | 1605 | struct kmem_cache_cpu *c; |
1612 | |||
1613 | /* | ||
1614 | * The SLUB_FASTPATH path is provisional and is currently disabled if the | ||
1615 | * kernel is compiled with preemption or if the arch does not support | ||
1616 | * fast cmpxchg operations. There are a couple of coming changes that will | ||
1617 | * simplify matters and allow preemption. Ultimately we may end up making | ||
1618 | * SLUB_FASTPATH the default. | ||
1619 | * | ||
1620 | * 1. The introduction of the per cpu allocator will avoid array lookups | ||
1621 | * through get_cpu_slab(). A special register can be used instead. | ||
1622 | * | ||
1623 | * 2. The introduction of per cpu atomic operations (cpu_ops) means that | ||
1624 | * we can realize the logic here entirely with per cpu atomics. The | ||
1625 | * per cpu atomic ops will take care of the preemption issues. | ||
1626 | */ | ||
1627 | |||
1628 | #ifdef SLUB_FASTPATH | ||
1629 | c = get_cpu_slab(s, raw_smp_processor_id()); | ||
1630 | do { | ||
1631 | object = c->freelist; | ||
1632 | if (unlikely(is_end(object) || !node_match(c, node))) { | ||
1633 | object = __slab_alloc(s, gfpflags, node, addr, c); | ||
1634 | break; | ||
1635 | } | ||
1636 | stat(c, ALLOC_FASTPATH); | ||
1637 | } while (cmpxchg_local(&c->freelist, object, object[c->offset]) | ||
1638 | != object); | ||
1639 | #else | ||
1640 | unsigned long flags; | 1606 | unsigned long flags; |
1641 | 1607 | ||
1642 | local_irq_save(flags); | 1608 | local_irq_save(flags); |
@@ -1651,7 +1617,6 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
1651 | stat(c, ALLOC_FASTPATH); | 1617 | stat(c, ALLOC_FASTPATH); |
1652 | } | 1618 | } |
1653 | local_irq_restore(flags); | 1619 | local_irq_restore(flags); |
1654 | #endif | ||
1655 | 1620 | ||
1656 | if (unlikely((gfpflags & __GFP_ZERO) && object)) | 1621 | if (unlikely((gfpflags & __GFP_ZERO) && object)) |
1657 | memset(object, 0, c->objsize); | 1622 | memset(object, 0, c->objsize); |
@@ -1688,11 +1653,6 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
1688 | void **object = (void *)x; | 1653 | void **object = (void *)x; |
1689 | struct kmem_cache_cpu *c; | 1654 | struct kmem_cache_cpu *c; |
1690 | 1655 | ||
1691 | #ifdef SLUB_FASTPATH | ||
1692 | unsigned long flags; | ||
1693 | |||
1694 | local_irq_save(flags); | ||
1695 | #endif | ||
1696 | c = get_cpu_slab(s, raw_smp_processor_id()); | 1656 | c = get_cpu_slab(s, raw_smp_processor_id()); |
1697 | stat(c, FREE_SLOWPATH); | 1657 | stat(c, FREE_SLOWPATH); |
1698 | slab_lock(page); | 1658 | slab_lock(page); |
@@ -1724,9 +1684,6 @@ checks_ok: | |||
1724 | 1684 | ||
1725 | out_unlock: | 1685 | out_unlock: |
1726 | slab_unlock(page); | 1686 | slab_unlock(page); |
1727 | #ifdef SLUB_FASTPATH | ||
1728 | local_irq_restore(flags); | ||
1729 | #endif | ||
1730 | return; | 1687 | return; |
1731 | 1688 | ||
1732 | slab_empty: | 1689 | slab_empty: |
@@ -1739,9 +1696,6 @@ slab_empty: | |||
1739 | } | 1696 | } |
1740 | slab_unlock(page); | 1697 | slab_unlock(page); |
1741 | stat(c, FREE_SLAB); | 1698 | stat(c, FREE_SLAB); |
1742 | #ifdef SLUB_FASTPATH | ||
1743 | local_irq_restore(flags); | ||
1744 | #endif | ||
1745 | discard_slab(s, page); | 1699 | discard_slab(s, page); |
1746 | return; | 1700 | return; |
1747 | 1701 | ||
@@ -1767,34 +1721,6 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
1767 | { | 1721 | { |
1768 | void **object = (void *)x; | 1722 | void **object = (void *)x; |
1769 | struct kmem_cache_cpu *c; | 1723 | struct kmem_cache_cpu *c; |
1770 | |||
1771 | #ifdef SLUB_FASTPATH | ||
1772 | void **freelist; | ||
1773 | |||
1774 | c = get_cpu_slab(s, raw_smp_processor_id()); | ||
1775 | debug_check_no_locks_freed(object, s->objsize); | ||
1776 | do { | ||
1777 | freelist = c->freelist; | ||
1778 | barrier(); | ||
1779 | /* | ||
1780 | * If the compiler would reorder the retrieval of c->page to | ||
1781 | * come before c->freelist then an interrupt could | ||
1782 | * change the cpu slab before we retrieve c->freelist. We | ||
1783 | * could be matching on a page no longer active and put the | ||
1784 | * object onto the freelist of the wrong slab. | ||
1785 | * | ||
1786 | * On the other hand: If we already have the freelist pointer | ||
1787 | * then any change of cpu_slab will cause the cmpxchg to fail | ||
1788 | * since the freelist pointers are unique per slab. | ||
1789 | */ | ||
1790 | if (unlikely(page != c->page || c->node < 0)) { | ||
1791 | __slab_free(s, page, x, addr, c->offset); | ||
1792 | break; | ||
1793 | } | ||
1794 | object[c->offset] = freelist; | ||
1795 | stat(c, FREE_FASTPATH); | ||
1796 | } while (cmpxchg_local(&c->freelist, freelist, object) != freelist); | ||
1797 | #else | ||
1798 | unsigned long flags; | 1724 | unsigned long flags; |
1799 | 1725 | ||
1800 | local_irq_save(flags); | 1726 | local_irq_save(flags); |
@@ -1808,7 +1734,6 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
1808 | __slab_free(s, page, x, addr, c->offset); | 1734 | __slab_free(s, page, x, addr, c->offset); |
1809 | 1735 | ||
1810 | local_irq_restore(flags); | 1736 | local_irq_restore(flags); |
1811 | #endif | ||
1812 | } | 1737 | } |
1813 | 1738 | ||
1814 | void kmem_cache_free(struct kmem_cache *s, void *x) | 1739 | void kmem_cache_free(struct kmem_cache *s, void *x) |
@@ -2329,10 +2254,33 @@ static int calculate_sizes(struct kmem_cache *s) | |||
2329 | size = ALIGN(size, align); | 2254 | size = ALIGN(size, align); |
2330 | s->size = size; | 2255 | s->size = size; |
2331 | 2256 | ||
2332 | s->order = calculate_order(size); | 2257 | if ((flags & __KMALLOC_CACHE) && |
2258 | PAGE_SIZE / size < slub_min_objects) { | ||
2259 | /* | ||
2260 | * Kmalloc cache that would not have enough objects in | ||
2261 | * an order 0 page. Kmalloc slabs can fallback to | ||
2262 | * page allocator order 0 allocs so take a reasonably large | ||
2263 | * order that will allows us a good number of objects. | ||
2264 | */ | ||
2265 | s->order = max(slub_max_order, PAGE_ALLOC_COSTLY_ORDER); | ||
2266 | s->flags |= __PAGE_ALLOC_FALLBACK; | ||
2267 | s->allocflags |= __GFP_NOWARN; | ||
2268 | } else | ||
2269 | s->order = calculate_order(size); | ||
2270 | |||
2333 | if (s->order < 0) | 2271 | if (s->order < 0) |
2334 | return 0; | 2272 | return 0; |
2335 | 2273 | ||
2274 | s->allocflags = 0; | ||
2275 | if (s->order) | ||
2276 | s->allocflags |= __GFP_COMP; | ||
2277 | |||
2278 | if (s->flags & SLAB_CACHE_DMA) | ||
2279 | s->allocflags |= SLUB_DMA; | ||
2280 | |||
2281 | if (s->flags & SLAB_RECLAIM_ACCOUNT) | ||
2282 | s->allocflags |= __GFP_RECLAIMABLE; | ||
2283 | |||
2336 | /* | 2284 | /* |
2337 | * Determine the number of objects per slab | 2285 | * Determine the number of objects per slab |
2338 | */ | 2286 | */ |
@@ -2484,11 +2432,11 @@ EXPORT_SYMBOL(kmem_cache_destroy); | |||
2484 | * Kmalloc subsystem | 2432 | * Kmalloc subsystem |
2485 | *******************************************************************/ | 2433 | *******************************************************************/ |
2486 | 2434 | ||
2487 | struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned; | 2435 | struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; |
2488 | EXPORT_SYMBOL(kmalloc_caches); | 2436 | EXPORT_SYMBOL(kmalloc_caches); |
2489 | 2437 | ||
2490 | #ifdef CONFIG_ZONE_DMA | 2438 | #ifdef CONFIG_ZONE_DMA |
2491 | static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT]; | 2439 | static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; |
2492 | #endif | 2440 | #endif |
2493 | 2441 | ||
2494 | static int __init setup_slub_min_order(char *str) | 2442 | static int __init setup_slub_min_order(char *str) |
@@ -2536,7 +2484,7 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, | |||
2536 | 2484 | ||
2537 | down_write(&slub_lock); | 2485 | down_write(&slub_lock); |
2538 | if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, | 2486 | if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, |
2539 | flags, NULL)) | 2487 | flags | __KMALLOC_CACHE, NULL)) |
2540 | goto panic; | 2488 | goto panic; |
2541 | 2489 | ||
2542 | list_add(&s->list, &slab_caches); | 2490 | list_add(&s->list, &slab_caches); |
@@ -2670,9 +2618,8 @@ void *__kmalloc(size_t size, gfp_t flags) | |||
2670 | { | 2618 | { |
2671 | struct kmem_cache *s; | 2619 | struct kmem_cache *s; |
2672 | 2620 | ||
2673 | if (unlikely(size > PAGE_SIZE / 2)) | 2621 | if (unlikely(size > PAGE_SIZE)) |
2674 | return (void *)__get_free_pages(flags | __GFP_COMP, | 2622 | return kmalloc_large(size, flags); |
2675 | get_order(size)); | ||
2676 | 2623 | ||
2677 | s = get_slab(size, flags); | 2624 | s = get_slab(size, flags); |
2678 | 2625 | ||
@@ -2688,9 +2635,8 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) | |||
2688 | { | 2635 | { |
2689 | struct kmem_cache *s; | 2636 | struct kmem_cache *s; |
2690 | 2637 | ||
2691 | if (unlikely(size > PAGE_SIZE / 2)) | 2638 | if (unlikely(size > PAGE_SIZE)) |
2692 | return (void *)__get_free_pages(flags | __GFP_COMP, | 2639 | return kmalloc_large(size, flags); |
2693 | get_order(size)); | ||
2694 | 2640 | ||
2695 | s = get_slab(size, flags); | 2641 | s = get_slab(size, flags); |
2696 | 2642 | ||
@@ -3001,7 +2947,7 @@ void __init kmem_cache_init(void) | |||
3001 | caches++; | 2947 | caches++; |
3002 | } | 2948 | } |
3003 | 2949 | ||
3004 | for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) { | 2950 | for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) { |
3005 | create_kmalloc_cache(&kmalloc_caches[i], | 2951 | create_kmalloc_cache(&kmalloc_caches[i], |
3006 | "kmalloc", 1 << i, GFP_KERNEL); | 2952 | "kmalloc", 1 << i, GFP_KERNEL); |
3007 | caches++; | 2953 | caches++; |
@@ -3028,7 +2974,7 @@ void __init kmem_cache_init(void) | |||
3028 | slab_state = UP; | 2974 | slab_state = UP; |
3029 | 2975 | ||
3030 | /* Provide the correct kmalloc names now that the caches are up */ | 2976 | /* Provide the correct kmalloc names now that the caches are up */ |
3031 | for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) | 2977 | for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) |
3032 | kmalloc_caches[i]. name = | 2978 | kmalloc_caches[i]. name = |
3033 | kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); | 2979 | kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); |
3034 | 2980 | ||
@@ -3057,6 +3003,9 @@ static int slab_unmergeable(struct kmem_cache *s) | |||
3057 | if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) | 3003 | if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) |
3058 | return 1; | 3004 | return 1; |
3059 | 3005 | ||
3006 | if ((s->flags & __PAGE_ALLOC_FALLBACK)) | ||
3007 | return 1; | ||
3008 | |||
3060 | if (s->ctor) | 3009 | if (s->ctor) |
3061 | return 1; | 3010 | return 1; |
3062 | 3011 | ||
@@ -3218,9 +3167,9 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) | |||
3218 | { | 3167 | { |
3219 | struct kmem_cache *s; | 3168 | struct kmem_cache *s; |
3220 | 3169 | ||
3221 | if (unlikely(size > PAGE_SIZE / 2)) | 3170 | if (unlikely(size > PAGE_SIZE)) |
3222 | return (void *)__get_free_pages(gfpflags | __GFP_COMP, | 3171 | return kmalloc_large(size, gfpflags); |
3223 | get_order(size)); | 3172 | |
3224 | s = get_slab(size, gfpflags); | 3173 | s = get_slab(size, gfpflags); |
3225 | 3174 | ||
3226 | if (unlikely(ZERO_OR_NULL_PTR(s))) | 3175 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
@@ -3234,9 +3183,9 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, | |||
3234 | { | 3183 | { |
3235 | struct kmem_cache *s; | 3184 | struct kmem_cache *s; |
3236 | 3185 | ||
3237 | if (unlikely(size > PAGE_SIZE / 2)) | 3186 | if (unlikely(size > PAGE_SIZE)) |
3238 | return (void *)__get_free_pages(gfpflags | __GFP_COMP, | 3187 | return kmalloc_large(size, gfpflags); |
3239 | get_order(size)); | 3188 | |
3240 | s = get_slab(size, gfpflags); | 3189 | s = get_slab(size, gfpflags); |
3241 | 3190 | ||
3242 | if (unlikely(ZERO_OR_NULL_PTR(s))) | 3191 | if (unlikely(ZERO_OR_NULL_PTR(s))) |