diff options
Diffstat (limited to 'mm/slub.c')
| -rw-r--r-- | mm/slub.c | 175 |
1 files changed, 62 insertions, 113 deletions
| @@ -149,13 +149,6 @@ static inline void ClearSlabDebug(struct page *page) | |||
| 149 | /* Enable to test recovery from slab corruption on boot */ | 149 | /* Enable to test recovery from slab corruption on boot */ |
| 150 | #undef SLUB_RESILIENCY_TEST | 150 | #undef SLUB_RESILIENCY_TEST |
| 151 | 151 | ||
| 152 | /* | ||
| 153 | * Currently fastpath is not supported if preemption is enabled. | ||
| 154 | */ | ||
| 155 | #if defined(CONFIG_FAST_CMPXCHG_LOCAL) && !defined(CONFIG_PREEMPT) | ||
| 156 | #define SLUB_FASTPATH | ||
| 157 | #endif | ||
| 158 | |||
| 159 | #if PAGE_SHIFT <= 12 | 152 | #if PAGE_SHIFT <= 12 |
| 160 | 153 | ||
| 161 | /* | 154 | /* |
| @@ -211,6 +204,8 @@ static inline void ClearSlabDebug(struct page *page) | |||
| 211 | /* Internal SLUB flags */ | 204 | /* Internal SLUB flags */ |
| 212 | #define __OBJECT_POISON 0x80000000 /* Poison object */ | 205 | #define __OBJECT_POISON 0x80000000 /* Poison object */ |
| 213 | #define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ | 206 | #define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ |
| 207 | #define __KMALLOC_CACHE 0x20000000 /* objects freed using kfree */ | ||
| 208 | #define __PAGE_ALLOC_FALLBACK 0x10000000 /* Allow fallback to page alloc */ | ||
| 214 | 209 | ||
| 215 | /* Not all arches define cache_line_size */ | 210 | /* Not all arches define cache_line_size */ |
| 216 | #ifndef cache_line_size | 211 | #ifndef cache_line_size |
| @@ -308,7 +303,7 @@ static inline int is_end(void *addr) | |||
| 308 | return (unsigned long)addr & PAGE_MAPPING_ANON; | 303 | return (unsigned long)addr & PAGE_MAPPING_ANON; |
| 309 | } | 304 | } |
| 310 | 305 | ||
| 311 | void *slab_address(struct page *page) | 306 | static void *slab_address(struct page *page) |
| 312 | { | 307 | { |
| 313 | return page->end - PAGE_MAPPING_ANON; | 308 | return page->end - PAGE_MAPPING_ANON; |
| 314 | } | 309 | } |
| @@ -1078,14 +1073,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
| 1078 | struct page *page; | 1073 | struct page *page; |
| 1079 | int pages = 1 << s->order; | 1074 | int pages = 1 << s->order; |
| 1080 | 1075 | ||
| 1081 | if (s->order) | 1076 | flags |= s->allocflags; |
| 1082 | flags |= __GFP_COMP; | ||
| 1083 | |||
| 1084 | if (s->flags & SLAB_CACHE_DMA) | ||
| 1085 | flags |= SLUB_DMA; | ||
| 1086 | |||
| 1087 | if (s->flags & SLAB_RECLAIM_ACCOUNT) | ||
| 1088 | flags |= __GFP_RECLAIMABLE; | ||
| 1089 | 1077 | ||
| 1090 | if (node == -1) | 1078 | if (node == -1) |
| 1091 | page = alloc_pages(flags, s->order); | 1079 | page = alloc_pages(flags, s->order); |
| @@ -1519,11 +1507,7 @@ static void *__slab_alloc(struct kmem_cache *s, | |||
| 1519 | { | 1507 | { |
| 1520 | void **object; | 1508 | void **object; |
| 1521 | struct page *new; | 1509 | struct page *new; |
| 1522 | #ifdef SLUB_FASTPATH | ||
| 1523 | unsigned long flags; | ||
| 1524 | 1510 | ||
| 1525 | local_irq_save(flags); | ||
| 1526 | #endif | ||
| 1527 | if (!c->page) | 1511 | if (!c->page) |
| 1528 | goto new_slab; | 1512 | goto new_slab; |
| 1529 | 1513 | ||
| @@ -1546,10 +1530,6 @@ load_freelist: | |||
| 1546 | unlock_out: | 1530 | unlock_out: |
| 1547 | slab_unlock(c->page); | 1531 | slab_unlock(c->page); |
| 1548 | stat(c, ALLOC_SLOWPATH); | 1532 | stat(c, ALLOC_SLOWPATH); |
| 1549 | out: | ||
| 1550 | #ifdef SLUB_FASTPATH | ||
| 1551 | local_irq_restore(flags); | ||
| 1552 | #endif | ||
| 1553 | return object; | 1533 | return object; |
| 1554 | 1534 | ||
| 1555 | another_slab: | 1535 | another_slab: |
| @@ -1581,8 +1561,22 @@ new_slab: | |||
| 1581 | c->page = new; | 1561 | c->page = new; |
| 1582 | goto load_freelist; | 1562 | goto load_freelist; |
| 1583 | } | 1563 | } |
| 1584 | object = NULL; | 1564 | |
| 1585 | goto out; | 1565 | /* |
| 1566 | * No memory available. | ||
| 1567 | * | ||
| 1568 | * If the slab uses higher order allocs but the object is | ||
| 1569 | * smaller than a page size then we can fallback in emergencies | ||
| 1570 | * to the page allocator via kmalloc_large. The page allocator may | ||
| 1571 | * have failed to obtain a higher order page and we can try to | ||
| 1572 | * allocate a single page if the object fits into a single page. | ||
| 1573 | * That is only possible if certain conditions are met that are being | ||
| 1574 | * checked when a slab is created. | ||
| 1575 | */ | ||
| 1576 | if (!(gfpflags & __GFP_NORETRY) && (s->flags & __PAGE_ALLOC_FALLBACK)) | ||
| 1577 | return kmalloc_large(s->objsize, gfpflags); | ||
| 1578 | |||
| 1579 | return NULL; | ||
| 1586 | debug: | 1580 | debug: |
| 1587 | object = c->page->freelist; | 1581 | object = c->page->freelist; |
| 1588 | if (!alloc_debug_processing(s, c->page, object, addr)) | 1582 | if (!alloc_debug_processing(s, c->page, object, addr)) |
| @@ -1609,34 +1603,6 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
| 1609 | { | 1603 | { |
| 1610 | void **object; | 1604 | void **object; |
| 1611 | struct kmem_cache_cpu *c; | 1605 | struct kmem_cache_cpu *c; |
| 1612 | |||
| 1613 | /* | ||
| 1614 | * The SLUB_FASTPATH path is provisional and is currently disabled if the | ||
| 1615 | * kernel is compiled with preemption or if the arch does not support | ||
| 1616 | * fast cmpxchg operations. There are a couple of coming changes that will | ||
| 1617 | * simplify matters and allow preemption. Ultimately we may end up making | ||
| 1618 | * SLUB_FASTPATH the default. | ||
| 1619 | * | ||
| 1620 | * 1. The introduction of the per cpu allocator will avoid array lookups | ||
| 1621 | * through get_cpu_slab(). A special register can be used instead. | ||
| 1622 | * | ||
| 1623 | * 2. The introduction of per cpu atomic operations (cpu_ops) means that | ||
| 1624 | * we can realize the logic here entirely with per cpu atomics. The | ||
| 1625 | * per cpu atomic ops will take care of the preemption issues. | ||
| 1626 | */ | ||
| 1627 | |||
| 1628 | #ifdef SLUB_FASTPATH | ||
| 1629 | c = get_cpu_slab(s, raw_smp_processor_id()); | ||
| 1630 | do { | ||
| 1631 | object = c->freelist; | ||
| 1632 | if (unlikely(is_end(object) || !node_match(c, node))) { | ||
| 1633 | object = __slab_alloc(s, gfpflags, node, addr, c); | ||
| 1634 | break; | ||
| 1635 | } | ||
| 1636 | stat(c, ALLOC_FASTPATH); | ||
| 1637 | } while (cmpxchg_local(&c->freelist, object, object[c->offset]) | ||
| 1638 | != object); | ||
| 1639 | #else | ||
| 1640 | unsigned long flags; | 1606 | unsigned long flags; |
| 1641 | 1607 | ||
| 1642 | local_irq_save(flags); | 1608 | local_irq_save(flags); |
| @@ -1651,7 +1617,6 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
| 1651 | stat(c, ALLOC_FASTPATH); | 1617 | stat(c, ALLOC_FASTPATH); |
| 1652 | } | 1618 | } |
| 1653 | local_irq_restore(flags); | 1619 | local_irq_restore(flags); |
| 1654 | #endif | ||
| 1655 | 1620 | ||
| 1656 | if (unlikely((gfpflags & __GFP_ZERO) && object)) | 1621 | if (unlikely((gfpflags & __GFP_ZERO) && object)) |
| 1657 | memset(object, 0, c->objsize); | 1622 | memset(object, 0, c->objsize); |
| @@ -1688,11 +1653,6 @@ static void __slab_free(struct kmem_cache *s, struct page *page, | |||
| 1688 | void **object = (void *)x; | 1653 | void **object = (void *)x; |
| 1689 | struct kmem_cache_cpu *c; | 1654 | struct kmem_cache_cpu *c; |
| 1690 | 1655 | ||
| 1691 | #ifdef SLUB_FASTPATH | ||
| 1692 | unsigned long flags; | ||
| 1693 | |||
| 1694 | local_irq_save(flags); | ||
| 1695 | #endif | ||
| 1696 | c = get_cpu_slab(s, raw_smp_processor_id()); | 1656 | c = get_cpu_slab(s, raw_smp_processor_id()); |
| 1697 | stat(c, FREE_SLOWPATH); | 1657 | stat(c, FREE_SLOWPATH); |
| 1698 | slab_lock(page); | 1658 | slab_lock(page); |
| @@ -1724,9 +1684,6 @@ checks_ok: | |||
| 1724 | 1684 | ||
| 1725 | out_unlock: | 1685 | out_unlock: |
| 1726 | slab_unlock(page); | 1686 | slab_unlock(page); |
| 1727 | #ifdef SLUB_FASTPATH | ||
| 1728 | local_irq_restore(flags); | ||
| 1729 | #endif | ||
| 1730 | return; | 1687 | return; |
| 1731 | 1688 | ||
| 1732 | slab_empty: | 1689 | slab_empty: |
| @@ -1739,9 +1696,6 @@ slab_empty: | |||
| 1739 | } | 1696 | } |
| 1740 | slab_unlock(page); | 1697 | slab_unlock(page); |
| 1741 | stat(c, FREE_SLAB); | 1698 | stat(c, FREE_SLAB); |
| 1742 | #ifdef SLUB_FASTPATH | ||
| 1743 | local_irq_restore(flags); | ||
| 1744 | #endif | ||
| 1745 | discard_slab(s, page); | 1699 | discard_slab(s, page); |
| 1746 | return; | 1700 | return; |
| 1747 | 1701 | ||
| @@ -1767,34 +1721,6 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
| 1767 | { | 1721 | { |
| 1768 | void **object = (void *)x; | 1722 | void **object = (void *)x; |
| 1769 | struct kmem_cache_cpu *c; | 1723 | struct kmem_cache_cpu *c; |
| 1770 | |||
| 1771 | #ifdef SLUB_FASTPATH | ||
| 1772 | void **freelist; | ||
| 1773 | |||
| 1774 | c = get_cpu_slab(s, raw_smp_processor_id()); | ||
| 1775 | debug_check_no_locks_freed(object, s->objsize); | ||
| 1776 | do { | ||
| 1777 | freelist = c->freelist; | ||
| 1778 | barrier(); | ||
| 1779 | /* | ||
| 1780 | * If the compiler would reorder the retrieval of c->page to | ||
| 1781 | * come before c->freelist then an interrupt could | ||
| 1782 | * change the cpu slab before we retrieve c->freelist. We | ||
| 1783 | * could be matching on a page no longer active and put the | ||
| 1784 | * object onto the freelist of the wrong slab. | ||
| 1785 | * | ||
| 1786 | * On the other hand: If we already have the freelist pointer | ||
| 1787 | * then any change of cpu_slab will cause the cmpxchg to fail | ||
| 1788 | * since the freelist pointers are unique per slab. | ||
| 1789 | */ | ||
| 1790 | if (unlikely(page != c->page || c->node < 0)) { | ||
| 1791 | __slab_free(s, page, x, addr, c->offset); | ||
| 1792 | break; | ||
| 1793 | } | ||
| 1794 | object[c->offset] = freelist; | ||
| 1795 | stat(c, FREE_FASTPATH); | ||
| 1796 | } while (cmpxchg_local(&c->freelist, freelist, object) != freelist); | ||
| 1797 | #else | ||
| 1798 | unsigned long flags; | 1724 | unsigned long flags; |
| 1799 | 1725 | ||
| 1800 | local_irq_save(flags); | 1726 | local_irq_save(flags); |
| @@ -1808,7 +1734,6 @@ static __always_inline void slab_free(struct kmem_cache *s, | |||
| 1808 | __slab_free(s, page, x, addr, c->offset); | 1734 | __slab_free(s, page, x, addr, c->offset); |
| 1809 | 1735 | ||
| 1810 | local_irq_restore(flags); | 1736 | local_irq_restore(flags); |
| 1811 | #endif | ||
| 1812 | } | 1737 | } |
| 1813 | 1738 | ||
| 1814 | void kmem_cache_free(struct kmem_cache *s, void *x) | 1739 | void kmem_cache_free(struct kmem_cache *s, void *x) |
| @@ -2329,10 +2254,33 @@ static int calculate_sizes(struct kmem_cache *s) | |||
| 2329 | size = ALIGN(size, align); | 2254 | size = ALIGN(size, align); |
| 2330 | s->size = size; | 2255 | s->size = size; |
| 2331 | 2256 | ||
| 2332 | s->order = calculate_order(size); | 2257 | if ((flags & __KMALLOC_CACHE) && |
| 2258 | PAGE_SIZE / size < slub_min_objects) { | ||
| 2259 | /* | ||
| 2260 | * Kmalloc cache that would not have enough objects in | ||
| 2261 | * an order 0 page. Kmalloc slabs can fallback to | ||
| 2262 | * page allocator order 0 allocs so take a reasonably large | ||
| 2263 | * order that will allows us a good number of objects. | ||
| 2264 | */ | ||
| 2265 | s->order = max(slub_max_order, PAGE_ALLOC_COSTLY_ORDER); | ||
| 2266 | s->flags |= __PAGE_ALLOC_FALLBACK; | ||
| 2267 | s->allocflags |= __GFP_NOWARN; | ||
| 2268 | } else | ||
| 2269 | s->order = calculate_order(size); | ||
| 2270 | |||
| 2333 | if (s->order < 0) | 2271 | if (s->order < 0) |
| 2334 | return 0; | 2272 | return 0; |
| 2335 | 2273 | ||
| 2274 | s->allocflags = 0; | ||
| 2275 | if (s->order) | ||
| 2276 | s->allocflags |= __GFP_COMP; | ||
| 2277 | |||
| 2278 | if (s->flags & SLAB_CACHE_DMA) | ||
| 2279 | s->allocflags |= SLUB_DMA; | ||
| 2280 | |||
| 2281 | if (s->flags & SLAB_RECLAIM_ACCOUNT) | ||
| 2282 | s->allocflags |= __GFP_RECLAIMABLE; | ||
| 2283 | |||
| 2336 | /* | 2284 | /* |
| 2337 | * Determine the number of objects per slab | 2285 | * Determine the number of objects per slab |
| 2338 | */ | 2286 | */ |
| @@ -2484,11 +2432,11 @@ EXPORT_SYMBOL(kmem_cache_destroy); | |||
| 2484 | * Kmalloc subsystem | 2432 | * Kmalloc subsystem |
| 2485 | *******************************************************************/ | 2433 | *******************************************************************/ |
| 2486 | 2434 | ||
| 2487 | struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned; | 2435 | struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; |
| 2488 | EXPORT_SYMBOL(kmalloc_caches); | 2436 | EXPORT_SYMBOL(kmalloc_caches); |
| 2489 | 2437 | ||
| 2490 | #ifdef CONFIG_ZONE_DMA | 2438 | #ifdef CONFIG_ZONE_DMA |
| 2491 | static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT]; | 2439 | static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; |
| 2492 | #endif | 2440 | #endif |
| 2493 | 2441 | ||
| 2494 | static int __init setup_slub_min_order(char *str) | 2442 | static int __init setup_slub_min_order(char *str) |
| @@ -2536,7 +2484,7 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, | |||
| 2536 | 2484 | ||
| 2537 | down_write(&slub_lock); | 2485 | down_write(&slub_lock); |
| 2538 | if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, | 2486 | if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, |
| 2539 | flags, NULL)) | 2487 | flags | __KMALLOC_CACHE, NULL)) |
| 2540 | goto panic; | 2488 | goto panic; |
| 2541 | 2489 | ||
| 2542 | list_add(&s->list, &slab_caches); | 2490 | list_add(&s->list, &slab_caches); |
| @@ -2670,9 +2618,8 @@ void *__kmalloc(size_t size, gfp_t flags) | |||
| 2670 | { | 2618 | { |
| 2671 | struct kmem_cache *s; | 2619 | struct kmem_cache *s; |
| 2672 | 2620 | ||
| 2673 | if (unlikely(size > PAGE_SIZE / 2)) | 2621 | if (unlikely(size > PAGE_SIZE)) |
| 2674 | return (void *)__get_free_pages(flags | __GFP_COMP, | 2622 | return kmalloc_large(size, flags); |
| 2675 | get_order(size)); | ||
| 2676 | 2623 | ||
| 2677 | s = get_slab(size, flags); | 2624 | s = get_slab(size, flags); |
| 2678 | 2625 | ||
| @@ -2688,9 +2635,8 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) | |||
| 2688 | { | 2635 | { |
| 2689 | struct kmem_cache *s; | 2636 | struct kmem_cache *s; |
| 2690 | 2637 | ||
| 2691 | if (unlikely(size > PAGE_SIZE / 2)) | 2638 | if (unlikely(size > PAGE_SIZE)) |
| 2692 | return (void *)__get_free_pages(flags | __GFP_COMP, | 2639 | return kmalloc_large(size, flags); |
| 2693 | get_order(size)); | ||
| 2694 | 2640 | ||
| 2695 | s = get_slab(size, flags); | 2641 | s = get_slab(size, flags); |
| 2696 | 2642 | ||
| @@ -3001,7 +2947,7 @@ void __init kmem_cache_init(void) | |||
| 3001 | caches++; | 2947 | caches++; |
| 3002 | } | 2948 | } |
| 3003 | 2949 | ||
| 3004 | for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) { | 2950 | for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) { |
| 3005 | create_kmalloc_cache(&kmalloc_caches[i], | 2951 | create_kmalloc_cache(&kmalloc_caches[i], |
| 3006 | "kmalloc", 1 << i, GFP_KERNEL); | 2952 | "kmalloc", 1 << i, GFP_KERNEL); |
| 3007 | caches++; | 2953 | caches++; |
| @@ -3028,7 +2974,7 @@ void __init kmem_cache_init(void) | |||
| 3028 | slab_state = UP; | 2974 | slab_state = UP; |
| 3029 | 2975 | ||
| 3030 | /* Provide the correct kmalloc names now that the caches are up */ | 2976 | /* Provide the correct kmalloc names now that the caches are up */ |
| 3031 | for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) | 2977 | for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) |
| 3032 | kmalloc_caches[i]. name = | 2978 | kmalloc_caches[i]. name = |
| 3033 | kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); | 2979 | kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); |
| 3034 | 2980 | ||
| @@ -3057,6 +3003,9 @@ static int slab_unmergeable(struct kmem_cache *s) | |||
| 3057 | if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) | 3003 | if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) |
| 3058 | return 1; | 3004 | return 1; |
| 3059 | 3005 | ||
| 3006 | if ((s->flags & __PAGE_ALLOC_FALLBACK)) | ||
| 3007 | return 1; | ||
| 3008 | |||
| 3060 | if (s->ctor) | 3009 | if (s->ctor) |
| 3061 | return 1; | 3010 | return 1; |
| 3062 | 3011 | ||
| @@ -3218,9 +3167,9 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) | |||
| 3218 | { | 3167 | { |
| 3219 | struct kmem_cache *s; | 3168 | struct kmem_cache *s; |
| 3220 | 3169 | ||
| 3221 | if (unlikely(size > PAGE_SIZE / 2)) | 3170 | if (unlikely(size > PAGE_SIZE)) |
| 3222 | return (void *)__get_free_pages(gfpflags | __GFP_COMP, | 3171 | return kmalloc_large(size, gfpflags); |
| 3223 | get_order(size)); | 3172 | |
| 3224 | s = get_slab(size, gfpflags); | 3173 | s = get_slab(size, gfpflags); |
| 3225 | 3174 | ||
| 3226 | if (unlikely(ZERO_OR_NULL_PTR(s))) | 3175 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
| @@ -3234,9 +3183,9 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, | |||
| 3234 | { | 3183 | { |
| 3235 | struct kmem_cache *s; | 3184 | struct kmem_cache *s; |
| 3236 | 3185 | ||
| 3237 | if (unlikely(size > PAGE_SIZE / 2)) | 3186 | if (unlikely(size > PAGE_SIZE)) |
| 3238 | return (void *)__get_free_pages(gfpflags | __GFP_COMP, | 3187 | return kmalloc_large(size, gfpflags); |
| 3239 | get_order(size)); | 3188 | |
| 3240 | s = get_slab(size, gfpflags); | 3189 | s = get_slab(size, gfpflags); |
| 3241 | 3190 | ||
| 3242 | if (unlikely(ZERO_OR_NULL_PTR(s))) | 3191 | if (unlikely(ZERO_OR_NULL_PTR(s))) |
