diff options
Diffstat (limited to 'mm/slab.c')
-rw-r--r-- | mm/slab.c | 434 |
1 files changed, 238 insertions, 196 deletions
@@ -313,7 +313,7 @@ static int drain_freelist(struct kmem_cache *cache, | |||
313 | struct kmem_list3 *l3, int tofree); | 313 | struct kmem_list3 *l3, int tofree); |
314 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, | 314 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, |
315 | int node); | 315 | int node); |
316 | static void enable_cpucache(struct kmem_cache *cachep); | 316 | static int enable_cpucache(struct kmem_cache *cachep); |
317 | static void cache_reap(void *unused); | 317 | static void cache_reap(void *unused); |
318 | 318 | ||
319 | /* | 319 | /* |
@@ -674,6 +674,8 @@ static struct kmem_cache cache_cache = { | |||
674 | #endif | 674 | #endif |
675 | }; | 675 | }; |
676 | 676 | ||
677 | #define BAD_ALIEN_MAGIC 0x01020304ul | ||
678 | |||
677 | #ifdef CONFIG_LOCKDEP | 679 | #ifdef CONFIG_LOCKDEP |
678 | 680 | ||
679 | /* | 681 | /* |
@@ -682,42 +684,58 @@ static struct kmem_cache cache_cache = { | |||
682 | * The locking for this is tricky in that it nests within the locks | 684 | * The locking for this is tricky in that it nests within the locks |
683 | * of all other slabs in a few places; to deal with this special | 685 | * of all other slabs in a few places; to deal with this special |
684 | * locking we put on-slab caches into a separate lock-class. | 686 | * locking we put on-slab caches into a separate lock-class. |
687 | * | ||
688 | * We set lock class for alien array caches which are up during init. | ||
689 | * The lock annotation will be lost if all cpus of a node goes down and | ||
690 | * then comes back up during hotplug | ||
685 | */ | 691 | */ |
686 | static struct lock_class_key on_slab_key; | 692 | static struct lock_class_key on_slab_l3_key; |
693 | static struct lock_class_key on_slab_alc_key; | ||
694 | |||
695 | static inline void init_lock_keys(void) | ||
687 | 696 | ||
688 | static inline void init_lock_keys(struct cache_sizes *s) | ||
689 | { | 697 | { |
690 | int q; | 698 | int q; |
691 | 699 | struct cache_sizes *s = malloc_sizes; | |
692 | for (q = 0; q < MAX_NUMNODES; q++) { | 700 | |
693 | if (!s->cs_cachep->nodelists[q] || OFF_SLAB(s->cs_cachep)) | 701 | while (s->cs_size != ULONG_MAX) { |
694 | continue; | 702 | for_each_node(q) { |
695 | lockdep_set_class(&s->cs_cachep->nodelists[q]->list_lock, | 703 | struct array_cache **alc; |
696 | &on_slab_key); | 704 | int r; |
705 | struct kmem_list3 *l3 = s->cs_cachep->nodelists[q]; | ||
706 | if (!l3 || OFF_SLAB(s->cs_cachep)) | ||
707 | continue; | ||
708 | lockdep_set_class(&l3->list_lock, &on_slab_l3_key); | ||
709 | alc = l3->alien; | ||
710 | /* | ||
711 | * FIXME: This check for BAD_ALIEN_MAGIC | ||
712 | * should go away when common slab code is taught to | ||
713 | * work even without alien caches. | ||
714 | * Currently, non NUMA code returns BAD_ALIEN_MAGIC | ||
715 | * for alloc_alien_cache, | ||
716 | */ | ||
717 | if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) | ||
718 | continue; | ||
719 | for_each_node(r) { | ||
720 | if (alc[r]) | ||
721 | lockdep_set_class(&alc[r]->lock, | ||
722 | &on_slab_alc_key); | ||
723 | } | ||
724 | } | ||
725 | s++; | ||
697 | } | 726 | } |
698 | } | 727 | } |
699 | |||
700 | #else | 728 | #else |
701 | static inline void init_lock_keys(struct cache_sizes *s) | 729 | static inline void init_lock_keys(void) |
702 | { | 730 | { |
703 | } | 731 | } |
704 | #endif | 732 | #endif |
705 | 733 | ||
706 | |||
707 | |||
708 | /* Guard access to the cache-chain. */ | 734 | /* Guard access to the cache-chain. */ |
709 | static DEFINE_MUTEX(cache_chain_mutex); | 735 | static DEFINE_MUTEX(cache_chain_mutex); |
710 | static struct list_head cache_chain; | 736 | static struct list_head cache_chain; |
711 | 737 | ||
712 | /* | 738 | /* |
713 | * vm_enough_memory() looks at this to determine how many slab-allocated pages | ||
714 | * are possibly freeable under pressure | ||
715 | * | ||
716 | * SLAB_RECLAIM_ACCOUNT turns this on per-slab | ||
717 | */ | ||
718 | atomic_t slab_reclaim_pages; | ||
719 | |||
720 | /* | ||
721 | * chicken and egg problem: delay the per-cpu array allocation | 739 | * chicken and egg problem: delay the per-cpu array allocation |
722 | * until the general caches are up. | 740 | * until the general caches are up. |
723 | */ | 741 | */ |
@@ -768,11 +786,10 @@ static inline struct kmem_cache *__find_general_cachep(size_t size, | |||
768 | return csizep->cs_cachep; | 786 | return csizep->cs_cachep; |
769 | } | 787 | } |
770 | 788 | ||
771 | struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) | 789 | static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) |
772 | { | 790 | { |
773 | return __find_general_cachep(size, gfpflags); | 791 | return __find_general_cachep(size, gfpflags); |
774 | } | 792 | } |
775 | EXPORT_SYMBOL(kmem_find_general_cachep); | ||
776 | 793 | ||
777 | static size_t slab_mgmt_size(size_t nr_objs, size_t align) | 794 | static size_t slab_mgmt_size(size_t nr_objs, size_t align) |
778 | { | 795 | { |
@@ -955,7 +972,39 @@ static int transfer_objects(struct array_cache *to, | |||
955 | return nr; | 972 | return nr; |
956 | } | 973 | } |
957 | 974 | ||
958 | #ifdef CONFIG_NUMA | 975 | #ifndef CONFIG_NUMA |
976 | |||
977 | #define drain_alien_cache(cachep, alien) do { } while (0) | ||
978 | #define reap_alien(cachep, l3) do { } while (0) | ||
979 | |||
980 | static inline struct array_cache **alloc_alien_cache(int node, int limit) | ||
981 | { | ||
982 | return (struct array_cache **)BAD_ALIEN_MAGIC; | ||
983 | } | ||
984 | |||
985 | static inline void free_alien_cache(struct array_cache **ac_ptr) | ||
986 | { | ||
987 | } | ||
988 | |||
989 | static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | ||
990 | { | ||
991 | return 0; | ||
992 | } | ||
993 | |||
994 | static inline void *alternate_node_alloc(struct kmem_cache *cachep, | ||
995 | gfp_t flags) | ||
996 | { | ||
997 | return NULL; | ||
998 | } | ||
999 | |||
1000 | static inline void *__cache_alloc_node(struct kmem_cache *cachep, | ||
1001 | gfp_t flags, int nodeid) | ||
1002 | { | ||
1003 | return NULL; | ||
1004 | } | ||
1005 | |||
1006 | #else /* CONFIG_NUMA */ | ||
1007 | |||
959 | static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); | 1008 | static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); |
960 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t); | 1009 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t); |
961 | 1010 | ||
@@ -1084,26 +1133,6 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
1084 | } | 1133 | } |
1085 | return 1; | 1134 | return 1; |
1086 | } | 1135 | } |
1087 | |||
1088 | #else | ||
1089 | |||
1090 | #define drain_alien_cache(cachep, alien) do { } while (0) | ||
1091 | #define reap_alien(cachep, l3) do { } while (0) | ||
1092 | |||
1093 | static inline struct array_cache **alloc_alien_cache(int node, int limit) | ||
1094 | { | ||
1095 | return (struct array_cache **) 0x01020304ul; | ||
1096 | } | ||
1097 | |||
1098 | static inline void free_alien_cache(struct array_cache **ac_ptr) | ||
1099 | { | ||
1100 | } | ||
1101 | |||
1102 | static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | ||
1103 | { | ||
1104 | return 0; | ||
1105 | } | ||
1106 | |||
1107 | #endif | 1136 | #endif |
1108 | 1137 | ||
1109 | static int __cpuinit cpuup_callback(struct notifier_block *nfb, | 1138 | static int __cpuinit cpuup_callback(struct notifier_block *nfb, |
@@ -1422,7 +1451,6 @@ void __init kmem_cache_init(void) | |||
1422 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, | 1451 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
1423 | NULL, NULL); | 1452 | NULL, NULL); |
1424 | } | 1453 | } |
1425 | init_lock_keys(sizes); | ||
1426 | 1454 | ||
1427 | sizes->cs_dmacachep = kmem_cache_create(names->name_dma, | 1455 | sizes->cs_dmacachep = kmem_cache_create(names->name_dma, |
1428 | sizes->cs_size, | 1456 | sizes->cs_size, |
@@ -1491,10 +1519,15 @@ void __init kmem_cache_init(void) | |||
1491 | struct kmem_cache *cachep; | 1519 | struct kmem_cache *cachep; |
1492 | mutex_lock(&cache_chain_mutex); | 1520 | mutex_lock(&cache_chain_mutex); |
1493 | list_for_each_entry(cachep, &cache_chain, next) | 1521 | list_for_each_entry(cachep, &cache_chain, next) |
1494 | enable_cpucache(cachep); | 1522 | if (enable_cpucache(cachep)) |
1523 | BUG(); | ||
1495 | mutex_unlock(&cache_chain_mutex); | 1524 | mutex_unlock(&cache_chain_mutex); |
1496 | } | 1525 | } |
1497 | 1526 | ||
1527 | /* Annotate slab for lockdep -- annotate the malloc caches */ | ||
1528 | init_lock_keys(); | ||
1529 | |||
1530 | |||
1498 | /* Done! */ | 1531 | /* Done! */ |
1499 | g_cpucache_up = FULL; | 1532 | g_cpucache_up = FULL; |
1500 | 1533 | ||
@@ -1543,7 +1576,13 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
1543 | */ | 1576 | */ |
1544 | flags |= __GFP_COMP; | 1577 | flags |= __GFP_COMP; |
1545 | #endif | 1578 | #endif |
1546 | flags |= cachep->gfpflags; | 1579 | |
1580 | /* | ||
1581 | * Under NUMA we want memory on the indicated node. We will handle | ||
1582 | * the needed fallback ourselves since we want to serve from our | ||
1583 | * per node object lists first for other nodes. | ||
1584 | */ | ||
1585 | flags |= cachep->gfpflags | GFP_THISNODE; | ||
1547 | 1586 | ||
1548 | page = alloc_pages_node(nodeid, flags, cachep->gfporder); | 1587 | page = alloc_pages_node(nodeid, flags, cachep->gfporder); |
1549 | if (!page) | 1588 | if (!page) |
@@ -1551,8 +1590,11 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
1551 | 1590 | ||
1552 | nr_pages = (1 << cachep->gfporder); | 1591 | nr_pages = (1 << cachep->gfporder); |
1553 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1592 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
1554 | atomic_add(nr_pages, &slab_reclaim_pages); | 1593 | add_zone_page_state(page_zone(page), |
1555 | add_zone_page_state(page_zone(page), NR_SLAB, nr_pages); | 1594 | NR_SLAB_RECLAIMABLE, nr_pages); |
1595 | else | ||
1596 | add_zone_page_state(page_zone(page), | ||
1597 | NR_SLAB_UNRECLAIMABLE, nr_pages); | ||
1556 | for (i = 0; i < nr_pages; i++) | 1598 | for (i = 0; i < nr_pages; i++) |
1557 | __SetPageSlab(page + i); | 1599 | __SetPageSlab(page + i); |
1558 | return page_address(page); | 1600 | return page_address(page); |
@@ -1567,7 +1609,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
1567 | struct page *page = virt_to_page(addr); | 1609 | struct page *page = virt_to_page(addr); |
1568 | const unsigned long nr_freed = i; | 1610 | const unsigned long nr_freed = i; |
1569 | 1611 | ||
1570 | sub_zone_page_state(page_zone(page), NR_SLAB, nr_freed); | 1612 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
1613 | sub_zone_page_state(page_zone(page), | ||
1614 | NR_SLAB_RECLAIMABLE, nr_freed); | ||
1615 | else | ||
1616 | sub_zone_page_state(page_zone(page), | ||
1617 | NR_SLAB_UNRECLAIMABLE, nr_freed); | ||
1571 | while (i--) { | 1618 | while (i--) { |
1572 | BUG_ON(!PageSlab(page)); | 1619 | BUG_ON(!PageSlab(page)); |
1573 | __ClearPageSlab(page); | 1620 | __ClearPageSlab(page); |
@@ -1576,8 +1623,6 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
1576 | if (current->reclaim_state) | 1623 | if (current->reclaim_state) |
1577 | current->reclaim_state->reclaimed_slab += nr_freed; | 1624 | current->reclaim_state->reclaimed_slab += nr_freed; |
1578 | free_pages((unsigned long)addr, cachep->gfporder); | 1625 | free_pages((unsigned long)addr, cachep->gfporder); |
1579 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | ||
1580 | atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages); | ||
1581 | } | 1626 | } |
1582 | 1627 | ||
1583 | static void kmem_rcu_free(struct rcu_head *head) | 1628 | static void kmem_rcu_free(struct rcu_head *head) |
@@ -1834,6 +1879,27 @@ static void set_up_list3s(struct kmem_cache *cachep, int index) | |||
1834 | } | 1879 | } |
1835 | } | 1880 | } |
1836 | 1881 | ||
1882 | static void __kmem_cache_destroy(struct kmem_cache *cachep) | ||
1883 | { | ||
1884 | int i; | ||
1885 | struct kmem_list3 *l3; | ||
1886 | |||
1887 | for_each_online_cpu(i) | ||
1888 | kfree(cachep->array[i]); | ||
1889 | |||
1890 | /* NUMA: free the list3 structures */ | ||
1891 | for_each_online_node(i) { | ||
1892 | l3 = cachep->nodelists[i]; | ||
1893 | if (l3) { | ||
1894 | kfree(l3->shared); | ||
1895 | free_alien_cache(l3->alien); | ||
1896 | kfree(l3); | ||
1897 | } | ||
1898 | } | ||
1899 | kmem_cache_free(&cache_cache, cachep); | ||
1900 | } | ||
1901 | |||
1902 | |||
1837 | /** | 1903 | /** |
1838 | * calculate_slab_order - calculate size (page order) of slabs | 1904 | * calculate_slab_order - calculate size (page order) of slabs |
1839 | * @cachep: pointer to the cache that is being created | 1905 | * @cachep: pointer to the cache that is being created |
@@ -1904,12 +1970,11 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
1904 | return left_over; | 1970 | return left_over; |
1905 | } | 1971 | } |
1906 | 1972 | ||
1907 | static void setup_cpu_cache(struct kmem_cache *cachep) | 1973 | static int setup_cpu_cache(struct kmem_cache *cachep) |
1908 | { | 1974 | { |
1909 | if (g_cpucache_up == FULL) { | 1975 | if (g_cpucache_up == FULL) |
1910 | enable_cpucache(cachep); | 1976 | return enable_cpucache(cachep); |
1911 | return; | 1977 | |
1912 | } | ||
1913 | if (g_cpucache_up == NONE) { | 1978 | if (g_cpucache_up == NONE) { |
1914 | /* | 1979 | /* |
1915 | * Note: the first kmem_cache_create must create the cache | 1980 | * Note: the first kmem_cache_create must create the cache |
@@ -1956,6 +2021,7 @@ static void setup_cpu_cache(struct kmem_cache *cachep) | |||
1956 | cpu_cache_get(cachep)->touched = 0; | 2021 | cpu_cache_get(cachep)->touched = 0; |
1957 | cachep->batchcount = 1; | 2022 | cachep->batchcount = 1; |
1958 | cachep->limit = BOOT_CPUCACHE_ENTRIES; | 2023 | cachep->limit = BOOT_CPUCACHE_ENTRIES; |
2024 | return 0; | ||
1959 | } | 2025 | } |
1960 | 2026 | ||
1961 | /** | 2027 | /** |
@@ -2097,6 +2163,15 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2097 | } else { | 2163 | } else { |
2098 | ralign = BYTES_PER_WORD; | 2164 | ralign = BYTES_PER_WORD; |
2099 | } | 2165 | } |
2166 | |||
2167 | /* | ||
2168 | * Redzoning and user store require word alignment. Note this will be | ||
2169 | * overridden by architecture or caller mandated alignment if either | ||
2170 | * is greater than BYTES_PER_WORD. | ||
2171 | */ | ||
2172 | if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER) | ||
2173 | ralign = BYTES_PER_WORD; | ||
2174 | |||
2100 | /* 2) arch mandated alignment: disables debug if necessary */ | 2175 | /* 2) arch mandated alignment: disables debug if necessary */ |
2101 | if (ralign < ARCH_SLAB_MINALIGN) { | 2176 | if (ralign < ARCH_SLAB_MINALIGN) { |
2102 | ralign = ARCH_SLAB_MINALIGN; | 2177 | ralign = ARCH_SLAB_MINALIGN; |
@@ -2110,8 +2185,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2110 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | 2185 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); |
2111 | } | 2186 | } |
2112 | /* | 2187 | /* |
2113 | * 4) Store it. Note that the debug code below can reduce | 2188 | * 4) Store it. |
2114 | * the alignment to BYTES_PER_WORD. | ||
2115 | */ | 2189 | */ |
2116 | align = ralign; | 2190 | align = ralign; |
2117 | 2191 | ||
@@ -2123,20 +2197,19 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2123 | #if DEBUG | 2197 | #if DEBUG |
2124 | cachep->obj_size = size; | 2198 | cachep->obj_size = size; |
2125 | 2199 | ||
2200 | /* | ||
2201 | * Both debugging options require word-alignment which is calculated | ||
2202 | * into align above. | ||
2203 | */ | ||
2126 | if (flags & SLAB_RED_ZONE) { | 2204 | if (flags & SLAB_RED_ZONE) { |
2127 | /* redzoning only works with word aligned caches */ | ||
2128 | align = BYTES_PER_WORD; | ||
2129 | |||
2130 | /* add space for red zone words */ | 2205 | /* add space for red zone words */ |
2131 | cachep->obj_offset += BYTES_PER_WORD; | 2206 | cachep->obj_offset += BYTES_PER_WORD; |
2132 | size += 2 * BYTES_PER_WORD; | 2207 | size += 2 * BYTES_PER_WORD; |
2133 | } | 2208 | } |
2134 | if (flags & SLAB_STORE_USER) { | 2209 | if (flags & SLAB_STORE_USER) { |
2135 | /* user store requires word alignment and | 2210 | /* user store requires one word storage behind the end of |
2136 | * one word storage behind the end of the real | 2211 | * the real object. |
2137 | * object. | ||
2138 | */ | 2212 | */ |
2139 | align = BYTES_PER_WORD; | ||
2140 | size += BYTES_PER_WORD; | 2213 | size += BYTES_PER_WORD; |
2141 | } | 2214 | } |
2142 | #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) | 2215 | #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) |
@@ -2200,14 +2273,26 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2200 | cachep->gfpflags |= GFP_DMA; | 2273 | cachep->gfpflags |= GFP_DMA; |
2201 | cachep->buffer_size = size; | 2274 | cachep->buffer_size = size; |
2202 | 2275 | ||
2203 | if (flags & CFLGS_OFF_SLAB) | 2276 | if (flags & CFLGS_OFF_SLAB) { |
2204 | cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); | 2277 | cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); |
2278 | /* | ||
2279 | * This is a possibility for one of the malloc_sizes caches. | ||
2280 | * But since we go off slab only for object size greater than | ||
2281 | * PAGE_SIZE/8, and malloc_sizes gets created in ascending order, | ||
2282 | * this should not happen at all. | ||
2283 | * But leave a BUG_ON for some lucky dude. | ||
2284 | */ | ||
2285 | BUG_ON(!cachep->slabp_cache); | ||
2286 | } | ||
2205 | cachep->ctor = ctor; | 2287 | cachep->ctor = ctor; |
2206 | cachep->dtor = dtor; | 2288 | cachep->dtor = dtor; |
2207 | cachep->name = name; | 2289 | cachep->name = name; |
2208 | 2290 | ||
2209 | 2291 | if (setup_cpu_cache(cachep)) { | |
2210 | setup_cpu_cache(cachep); | 2292 | __kmem_cache_destroy(cachep); |
2293 | cachep = NULL; | ||
2294 | goto oops; | ||
2295 | } | ||
2211 | 2296 | ||
2212 | /* cache setup completed, link it into the list */ | 2297 | /* cache setup completed, link it into the list */ |
2213 | list_add(&cachep->next, &cache_chain); | 2298 | list_add(&cachep->next, &cache_chain); |
@@ -2375,7 +2460,6 @@ EXPORT_SYMBOL(kmem_cache_shrink); | |||
2375 | * @cachep: the cache to destroy | 2460 | * @cachep: the cache to destroy |
2376 | * | 2461 | * |
2377 | * Remove a struct kmem_cache object from the slab cache. | 2462 | * Remove a struct kmem_cache object from the slab cache. |
2378 | * Returns 0 on success. | ||
2379 | * | 2463 | * |
2380 | * It is expected this function will be called by a module when it is | 2464 | * It is expected this function will be called by a module when it is |
2381 | * unloaded. This will remove the cache completely, and avoid a duplicate | 2465 | * unloaded. This will remove the cache completely, and avoid a duplicate |
@@ -2387,11 +2471,8 @@ EXPORT_SYMBOL(kmem_cache_shrink); | |||
2387 | * The caller must guarantee that noone will allocate memory from the cache | 2471 | * The caller must guarantee that noone will allocate memory from the cache |
2388 | * during the kmem_cache_destroy(). | 2472 | * during the kmem_cache_destroy(). |
2389 | */ | 2473 | */ |
2390 | int kmem_cache_destroy(struct kmem_cache *cachep) | 2474 | void kmem_cache_destroy(struct kmem_cache *cachep) |
2391 | { | 2475 | { |
2392 | int i; | ||
2393 | struct kmem_list3 *l3; | ||
2394 | |||
2395 | BUG_ON(!cachep || in_interrupt()); | 2476 | BUG_ON(!cachep || in_interrupt()); |
2396 | 2477 | ||
2397 | /* Don't let CPUs to come and go */ | 2478 | /* Don't let CPUs to come and go */ |
@@ -2411,31 +2492,28 @@ int kmem_cache_destroy(struct kmem_cache *cachep) | |||
2411 | list_add(&cachep->next, &cache_chain); | 2492 | list_add(&cachep->next, &cache_chain); |
2412 | mutex_unlock(&cache_chain_mutex); | 2493 | mutex_unlock(&cache_chain_mutex); |
2413 | unlock_cpu_hotplug(); | 2494 | unlock_cpu_hotplug(); |
2414 | return 1; | 2495 | return; |
2415 | } | 2496 | } |
2416 | 2497 | ||
2417 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) | 2498 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) |
2418 | synchronize_rcu(); | 2499 | synchronize_rcu(); |
2419 | 2500 | ||
2420 | for_each_online_cpu(i) | 2501 | __kmem_cache_destroy(cachep); |
2421 | kfree(cachep->array[i]); | ||
2422 | |||
2423 | /* NUMA: free the list3 structures */ | ||
2424 | for_each_online_node(i) { | ||
2425 | l3 = cachep->nodelists[i]; | ||
2426 | if (l3) { | ||
2427 | kfree(l3->shared); | ||
2428 | free_alien_cache(l3->alien); | ||
2429 | kfree(l3); | ||
2430 | } | ||
2431 | } | ||
2432 | kmem_cache_free(&cache_cache, cachep); | ||
2433 | unlock_cpu_hotplug(); | 2502 | unlock_cpu_hotplug(); |
2434 | return 0; | ||
2435 | } | 2503 | } |
2436 | EXPORT_SYMBOL(kmem_cache_destroy); | 2504 | EXPORT_SYMBOL(kmem_cache_destroy); |
2437 | 2505 | ||
2438 | /* Get the memory for a slab management obj. */ | 2506 | /* |
2507 | * Get the memory for a slab management obj. | ||
2508 | * For a slab cache when the slab descriptor is off-slab, slab descriptors | ||
2509 | * always come from malloc_sizes caches. The slab descriptor cannot | ||
2510 | * come from the same cache which is getting created because, | ||
2511 | * when we are searching for an appropriate cache for these | ||
2512 | * descriptors in kmem_cache_create, we search through the malloc_sizes array. | ||
2513 | * If we are creating a malloc_sizes cache here it would not be visible to | ||
2514 | * kmem_find_general_cachep till the initialization is complete. | ||
2515 | * Hence we cannot have slabp_cache same as the original cache. | ||
2516 | */ | ||
2439 | static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, | 2517 | static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, |
2440 | int colour_off, gfp_t local_flags, | 2518 | int colour_off, gfp_t local_flags, |
2441 | int nodeid) | 2519 | int nodeid) |
@@ -2968,14 +3046,6 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
2968 | void *objp; | 3046 | void *objp; |
2969 | struct array_cache *ac; | 3047 | struct array_cache *ac; |
2970 | 3048 | ||
2971 | #ifdef CONFIG_NUMA | ||
2972 | if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) { | ||
2973 | objp = alternate_node_alloc(cachep, flags); | ||
2974 | if (objp != NULL) | ||
2975 | return objp; | ||
2976 | } | ||
2977 | #endif | ||
2978 | |||
2979 | check_irq_off(); | 3049 | check_irq_off(); |
2980 | ac = cpu_cache_get(cachep); | 3050 | ac = cpu_cache_get(cachep); |
2981 | if (likely(ac->avail)) { | 3051 | if (likely(ac->avail)) { |
@@ -2993,12 +3063,24 @@ static __always_inline void *__cache_alloc(struct kmem_cache *cachep, | |||
2993 | gfp_t flags, void *caller) | 3063 | gfp_t flags, void *caller) |
2994 | { | 3064 | { |
2995 | unsigned long save_flags; | 3065 | unsigned long save_flags; |
2996 | void *objp; | 3066 | void *objp = NULL; |
2997 | 3067 | ||
2998 | cache_alloc_debugcheck_before(cachep, flags); | 3068 | cache_alloc_debugcheck_before(cachep, flags); |
2999 | 3069 | ||
3000 | local_irq_save(save_flags); | 3070 | local_irq_save(save_flags); |
3001 | objp = ____cache_alloc(cachep, flags); | 3071 | |
3072 | if (unlikely(NUMA_BUILD && | ||
3073 | current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) | ||
3074 | objp = alternate_node_alloc(cachep, flags); | ||
3075 | |||
3076 | if (!objp) | ||
3077 | objp = ____cache_alloc(cachep, flags); | ||
3078 | /* | ||
3079 | * We may just have run out of memory on the local node. | ||
3080 | * __cache_alloc_node() knows how to locate memory on other nodes | ||
3081 | */ | ||
3082 | if (NUMA_BUILD && !objp) | ||
3083 | objp = __cache_alloc_node(cachep, flags, numa_node_id()); | ||
3002 | local_irq_restore(save_flags); | 3084 | local_irq_restore(save_flags); |
3003 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, | 3085 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, |
3004 | caller); | 3086 | caller); |
@@ -3017,7 +3099,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3017 | { | 3099 | { |
3018 | int nid_alloc, nid_here; | 3100 | int nid_alloc, nid_here; |
3019 | 3101 | ||
3020 | if (in_interrupt()) | 3102 | if (in_interrupt() || (flags & __GFP_THISNODE)) |
3021 | return NULL; | 3103 | return NULL; |
3022 | nid_alloc = nid_here = numa_node_id(); | 3104 | nid_alloc = nid_here = numa_node_id(); |
3023 | if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) | 3105 | if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) |
@@ -3030,6 +3112,28 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3030 | } | 3112 | } |
3031 | 3113 | ||
3032 | /* | 3114 | /* |
3115 | * Fallback function if there was no memory available and no objects on a | ||
3116 | * certain node and we are allowed to fall back. We mimick the behavior of | ||
3117 | * the page allocator. We fall back according to a zonelist determined by | ||
3118 | * the policy layer while obeying cpuset constraints. | ||
3119 | */ | ||
3120 | void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) | ||
3121 | { | ||
3122 | struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy)) | ||
3123 | ->node_zonelists[gfp_zone(flags)]; | ||
3124 | struct zone **z; | ||
3125 | void *obj = NULL; | ||
3126 | |||
3127 | for (z = zonelist->zones; *z && !obj; z++) | ||
3128 | if (zone_idx(*z) <= ZONE_NORMAL && | ||
3129 | cpuset_zone_allowed(*z, flags)) | ||
3130 | obj = __cache_alloc_node(cache, | ||
3131 | flags | __GFP_THISNODE, | ||
3132 | zone_to_nid(*z)); | ||
3133 | return obj; | ||
3134 | } | ||
3135 | |||
3136 | /* | ||
3033 | * A interface to enable slab creation on nodeid | 3137 | * A interface to enable slab creation on nodeid |
3034 | */ | 3138 | */ |
3035 | static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | 3139 | static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, |
@@ -3082,11 +3186,15 @@ retry: | |||
3082 | must_grow: | 3186 | must_grow: |
3083 | spin_unlock(&l3->list_lock); | 3187 | spin_unlock(&l3->list_lock); |
3084 | x = cache_grow(cachep, flags, nodeid); | 3188 | x = cache_grow(cachep, flags, nodeid); |
3189 | if (x) | ||
3190 | goto retry; | ||
3085 | 3191 | ||
3086 | if (!x) | 3192 | if (!(flags & __GFP_THISNODE)) |
3087 | return NULL; | 3193 | /* Unable to grow the cache. Fall back to other nodes. */ |
3194 | return fallback_alloc(cachep, flags); | ||
3195 | |||
3196 | return NULL; | ||
3088 | 3197 | ||
3089 | goto retry; | ||
3090 | done: | 3198 | done: |
3091 | return obj; | 3199 | return obj; |
3092 | } | 3200 | } |
@@ -3119,6 +3227,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, | |||
3119 | if (slabp->inuse == 0) { | 3227 | if (slabp->inuse == 0) { |
3120 | if (l3->free_objects > l3->free_limit) { | 3228 | if (l3->free_objects > l3->free_limit) { |
3121 | l3->free_objects -= cachep->num; | 3229 | l3->free_objects -= cachep->num; |
3230 | /* No need to drop any previously held | ||
3231 | * lock here, even if we have a off-slab slab | ||
3232 | * descriptor it is guaranteed to come from | ||
3233 | * a different cache, refer to comments before | ||
3234 | * alloc_slabmgmt. | ||
3235 | */ | ||
3122 | slab_destroy(cachep, slabp); | 3236 | slab_destroy(cachep, slabp); |
3123 | } else { | 3237 | } else { |
3124 | list_add(&slabp->list, &l3->slabs_free); | 3238 | list_add(&slabp->list, &l3->slabs_free); |
@@ -3317,7 +3431,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
3317 | } | 3431 | } |
3318 | EXPORT_SYMBOL(kmem_cache_alloc_node); | 3432 | EXPORT_SYMBOL(kmem_cache_alloc_node); |
3319 | 3433 | ||
3320 | void *kmalloc_node(size_t size, gfp_t flags, int node) | 3434 | void *__kmalloc_node(size_t size, gfp_t flags, int node) |
3321 | { | 3435 | { |
3322 | struct kmem_cache *cachep; | 3436 | struct kmem_cache *cachep; |
3323 | 3437 | ||
@@ -3326,7 +3440,7 @@ void *kmalloc_node(size_t size, gfp_t flags, int node) | |||
3326 | return NULL; | 3440 | return NULL; |
3327 | return kmem_cache_alloc_node(cachep, flags, node); | 3441 | return kmem_cache_alloc_node(cachep, flags, node); |
3328 | } | 3442 | } |
3329 | EXPORT_SYMBOL(kmalloc_node); | 3443 | EXPORT_SYMBOL(__kmalloc_node); |
3330 | #endif | 3444 | #endif |
3331 | 3445 | ||
3332 | /** | 3446 | /** |
@@ -3370,55 +3484,6 @@ void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller) | |||
3370 | EXPORT_SYMBOL(__kmalloc_track_caller); | 3484 | EXPORT_SYMBOL(__kmalloc_track_caller); |
3371 | #endif | 3485 | #endif |
3372 | 3486 | ||
3373 | #ifdef CONFIG_SMP | ||
3374 | /** | ||
3375 | * __alloc_percpu - allocate one copy of the object for every present | ||
3376 | * cpu in the system, zeroing them. | ||
3377 | * Objects should be dereferenced using the per_cpu_ptr macro only. | ||
3378 | * | ||
3379 | * @size: how many bytes of memory are required. | ||
3380 | */ | ||
3381 | void *__alloc_percpu(size_t size) | ||
3382 | { | ||
3383 | int i; | ||
3384 | struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL); | ||
3385 | |||
3386 | if (!pdata) | ||
3387 | return NULL; | ||
3388 | |||
3389 | /* | ||
3390 | * Cannot use for_each_online_cpu since a cpu may come online | ||
3391 | * and we have no way of figuring out how to fix the array | ||
3392 | * that we have allocated then.... | ||
3393 | */ | ||
3394 | for_each_possible_cpu(i) { | ||
3395 | int node = cpu_to_node(i); | ||
3396 | |||
3397 | if (node_online(node)) | ||
3398 | pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node); | ||
3399 | else | ||
3400 | pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); | ||
3401 | |||
3402 | if (!pdata->ptrs[i]) | ||
3403 | goto unwind_oom; | ||
3404 | memset(pdata->ptrs[i], 0, size); | ||
3405 | } | ||
3406 | |||
3407 | /* Catch derefs w/o wrappers */ | ||
3408 | return (void *)(~(unsigned long)pdata); | ||
3409 | |||
3410 | unwind_oom: | ||
3411 | while (--i >= 0) { | ||
3412 | if (!cpu_possible(i)) | ||
3413 | continue; | ||
3414 | kfree(pdata->ptrs[i]); | ||
3415 | } | ||
3416 | kfree(pdata); | ||
3417 | return NULL; | ||
3418 | } | ||
3419 | EXPORT_SYMBOL(__alloc_percpu); | ||
3420 | #endif | ||
3421 | |||
3422 | /** | 3487 | /** |
3423 | * kmem_cache_free - Deallocate an object | 3488 | * kmem_cache_free - Deallocate an object |
3424 | * @cachep: The cache the allocation was from. | 3489 | * @cachep: The cache the allocation was from. |
@@ -3464,29 +3529,6 @@ void kfree(const void *objp) | |||
3464 | } | 3529 | } |
3465 | EXPORT_SYMBOL(kfree); | 3530 | EXPORT_SYMBOL(kfree); |
3466 | 3531 | ||
3467 | #ifdef CONFIG_SMP | ||
3468 | /** | ||
3469 | * free_percpu - free previously allocated percpu memory | ||
3470 | * @objp: pointer returned by alloc_percpu. | ||
3471 | * | ||
3472 | * Don't free memory not originally allocated by alloc_percpu() | ||
3473 | * The complemented objp is to check for that. | ||
3474 | */ | ||
3475 | void free_percpu(const void *objp) | ||
3476 | { | ||
3477 | int i; | ||
3478 | struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp); | ||
3479 | |||
3480 | /* | ||
3481 | * We allocate for all cpus so we cannot use for online cpu here. | ||
3482 | */ | ||
3483 | for_each_possible_cpu(i) | ||
3484 | kfree(p->ptrs[i]); | ||
3485 | kfree(p); | ||
3486 | } | ||
3487 | EXPORT_SYMBOL(free_percpu); | ||
3488 | #endif | ||
3489 | |||
3490 | unsigned int kmem_cache_size(struct kmem_cache *cachep) | 3532 | unsigned int kmem_cache_size(struct kmem_cache *cachep) |
3491 | { | 3533 | { |
3492 | return obj_size(cachep); | 3534 | return obj_size(cachep); |
@@ -3603,22 +3645,26 @@ static void do_ccupdate_local(void *info) | |||
3603 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | 3645 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, |
3604 | int batchcount, int shared) | 3646 | int batchcount, int shared) |
3605 | { | 3647 | { |
3606 | struct ccupdate_struct new; | 3648 | struct ccupdate_struct *new; |
3607 | int i, err; | 3649 | int i; |
3650 | |||
3651 | new = kzalloc(sizeof(*new), GFP_KERNEL); | ||
3652 | if (!new) | ||
3653 | return -ENOMEM; | ||
3608 | 3654 | ||
3609 | memset(&new.new, 0, sizeof(new.new)); | ||
3610 | for_each_online_cpu(i) { | 3655 | for_each_online_cpu(i) { |
3611 | new.new[i] = alloc_arraycache(cpu_to_node(i), limit, | 3656 | new->new[i] = alloc_arraycache(cpu_to_node(i), limit, |
3612 | batchcount); | 3657 | batchcount); |
3613 | if (!new.new[i]) { | 3658 | if (!new->new[i]) { |
3614 | for (i--; i >= 0; i--) | 3659 | for (i--; i >= 0; i--) |
3615 | kfree(new.new[i]); | 3660 | kfree(new->new[i]); |
3661 | kfree(new); | ||
3616 | return -ENOMEM; | 3662 | return -ENOMEM; |
3617 | } | 3663 | } |
3618 | } | 3664 | } |
3619 | new.cachep = cachep; | 3665 | new->cachep = cachep; |
3620 | 3666 | ||
3621 | on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1); | 3667 | on_each_cpu(do_ccupdate_local, (void *)new, 1, 1); |
3622 | 3668 | ||
3623 | check_irq_on(); | 3669 | check_irq_on(); |
3624 | cachep->batchcount = batchcount; | 3670 | cachep->batchcount = batchcount; |
@@ -3626,7 +3672,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | |||
3626 | cachep->shared = shared; | 3672 | cachep->shared = shared; |
3627 | 3673 | ||
3628 | for_each_online_cpu(i) { | 3674 | for_each_online_cpu(i) { |
3629 | struct array_cache *ccold = new.new[i]; | 3675 | struct array_cache *ccold = new->new[i]; |
3630 | if (!ccold) | 3676 | if (!ccold) |
3631 | continue; | 3677 | continue; |
3632 | spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); | 3678 | spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); |
@@ -3634,18 +3680,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | |||
3634 | spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); | 3680 | spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); |
3635 | kfree(ccold); | 3681 | kfree(ccold); |
3636 | } | 3682 | } |
3637 | 3683 | kfree(new); | |
3638 | err = alloc_kmemlist(cachep); | 3684 | return alloc_kmemlist(cachep); |
3639 | if (err) { | ||
3640 | printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n", | ||
3641 | cachep->name, -err); | ||
3642 | BUG(); | ||
3643 | } | ||
3644 | return 0; | ||
3645 | } | 3685 | } |
3646 | 3686 | ||
3647 | /* Called with cache_chain_mutex held always */ | 3687 | /* Called with cache_chain_mutex held always */ |
3648 | static void enable_cpucache(struct kmem_cache *cachep) | 3688 | static int enable_cpucache(struct kmem_cache *cachep) |
3649 | { | 3689 | { |
3650 | int err; | 3690 | int err; |
3651 | int limit, shared; | 3691 | int limit, shared; |
@@ -3697,6 +3737,7 @@ static void enable_cpucache(struct kmem_cache *cachep) | |||
3697 | if (err) | 3737 | if (err) |
3698 | printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", | 3738 | printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", |
3699 | cachep->name, -err); | 3739 | cachep->name, -err); |
3740 | return err; | ||
3700 | } | 3741 | } |
3701 | 3742 | ||
3702 | /* | 3743 | /* |
@@ -4157,6 +4198,7 @@ static int leaks_show(struct seq_file *m, void *p) | |||
4157 | show_symbol(m, n[2*i+2]); | 4198 | show_symbol(m, n[2*i+2]); |
4158 | seq_putc(m, '\n'); | 4199 | seq_putc(m, '\n'); |
4159 | } | 4200 | } |
4201 | |||
4160 | return 0; | 4202 | return 0; |
4161 | } | 4203 | } |
4162 | 4204 | ||