diff options
Diffstat (limited to 'mm/slab.c')
| -rw-r--r-- | mm/slab.c | 434 |
1 files changed, 238 insertions, 196 deletions
| @@ -313,7 +313,7 @@ static int drain_freelist(struct kmem_cache *cache, | |||
| 313 | struct kmem_list3 *l3, int tofree); | 313 | struct kmem_list3 *l3, int tofree); |
| 314 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, | 314 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, |
| 315 | int node); | 315 | int node); |
| 316 | static void enable_cpucache(struct kmem_cache *cachep); | 316 | static int enable_cpucache(struct kmem_cache *cachep); |
| 317 | static void cache_reap(void *unused); | 317 | static void cache_reap(void *unused); |
| 318 | 318 | ||
| 319 | /* | 319 | /* |
| @@ -674,6 +674,8 @@ static struct kmem_cache cache_cache = { | |||
| 674 | #endif | 674 | #endif |
| 675 | }; | 675 | }; |
| 676 | 676 | ||
| 677 | #define BAD_ALIEN_MAGIC 0x01020304ul | ||
| 678 | |||
| 677 | #ifdef CONFIG_LOCKDEP | 679 | #ifdef CONFIG_LOCKDEP |
| 678 | 680 | ||
| 679 | /* | 681 | /* |
| @@ -682,42 +684,58 @@ static struct kmem_cache cache_cache = { | |||
| 682 | * The locking for this is tricky in that it nests within the locks | 684 | * The locking for this is tricky in that it nests within the locks |
| 683 | * of all other slabs in a few places; to deal with this special | 685 | * of all other slabs in a few places; to deal with this special |
| 684 | * locking we put on-slab caches into a separate lock-class. | 686 | * locking we put on-slab caches into a separate lock-class. |
| 687 | * | ||
| 688 | * We set lock class for alien array caches which are up during init. | ||
| 689 | * The lock annotation will be lost if all cpus of a node goes down and | ||
| 690 | * then comes back up during hotplug | ||
| 685 | */ | 691 | */ |
| 686 | static struct lock_class_key on_slab_key; | 692 | static struct lock_class_key on_slab_l3_key; |
| 693 | static struct lock_class_key on_slab_alc_key; | ||
| 694 | |||
| 695 | static inline void init_lock_keys(void) | ||
| 687 | 696 | ||
| 688 | static inline void init_lock_keys(struct cache_sizes *s) | ||
| 689 | { | 697 | { |
| 690 | int q; | 698 | int q; |
| 691 | 699 | struct cache_sizes *s = malloc_sizes; | |
| 692 | for (q = 0; q < MAX_NUMNODES; q++) { | 700 | |
| 693 | if (!s->cs_cachep->nodelists[q] || OFF_SLAB(s->cs_cachep)) | 701 | while (s->cs_size != ULONG_MAX) { |
| 694 | continue; | 702 | for_each_node(q) { |
| 695 | lockdep_set_class(&s->cs_cachep->nodelists[q]->list_lock, | 703 | struct array_cache **alc; |
| 696 | &on_slab_key); | 704 | int r; |
| 705 | struct kmem_list3 *l3 = s->cs_cachep->nodelists[q]; | ||
| 706 | if (!l3 || OFF_SLAB(s->cs_cachep)) | ||
| 707 | continue; | ||
| 708 | lockdep_set_class(&l3->list_lock, &on_slab_l3_key); | ||
| 709 | alc = l3->alien; | ||
| 710 | /* | ||
| 711 | * FIXME: This check for BAD_ALIEN_MAGIC | ||
| 712 | * should go away when common slab code is taught to | ||
| 713 | * work even without alien caches. | ||
| 714 | * Currently, non NUMA code returns BAD_ALIEN_MAGIC | ||
| 715 | * for alloc_alien_cache, | ||
| 716 | */ | ||
| 717 | if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) | ||
| 718 | continue; | ||
| 719 | for_each_node(r) { | ||
| 720 | if (alc[r]) | ||
| 721 | lockdep_set_class(&alc[r]->lock, | ||
| 722 | &on_slab_alc_key); | ||
| 723 | } | ||
| 724 | } | ||
| 725 | s++; | ||
| 697 | } | 726 | } |
| 698 | } | 727 | } |
| 699 | |||
| 700 | #else | 728 | #else |
| 701 | static inline void init_lock_keys(struct cache_sizes *s) | 729 | static inline void init_lock_keys(void) |
| 702 | { | 730 | { |
| 703 | } | 731 | } |
| 704 | #endif | 732 | #endif |
| 705 | 733 | ||
| 706 | |||
| 707 | |||
| 708 | /* Guard access to the cache-chain. */ | 734 | /* Guard access to the cache-chain. */ |
| 709 | static DEFINE_MUTEX(cache_chain_mutex); | 735 | static DEFINE_MUTEX(cache_chain_mutex); |
| 710 | static struct list_head cache_chain; | 736 | static struct list_head cache_chain; |
| 711 | 737 | ||
| 712 | /* | 738 | /* |
| 713 | * vm_enough_memory() looks at this to determine how many slab-allocated pages | ||
| 714 | * are possibly freeable under pressure | ||
| 715 | * | ||
| 716 | * SLAB_RECLAIM_ACCOUNT turns this on per-slab | ||
| 717 | */ | ||
| 718 | atomic_t slab_reclaim_pages; | ||
| 719 | |||
| 720 | /* | ||
| 721 | * chicken and egg problem: delay the per-cpu array allocation | 739 | * chicken and egg problem: delay the per-cpu array allocation |
| 722 | * until the general caches are up. | 740 | * until the general caches are up. |
| 723 | */ | 741 | */ |
| @@ -768,11 +786,10 @@ static inline struct kmem_cache *__find_general_cachep(size_t size, | |||
| 768 | return csizep->cs_cachep; | 786 | return csizep->cs_cachep; |
| 769 | } | 787 | } |
| 770 | 788 | ||
| 771 | struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) | 789 | static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags) |
| 772 | { | 790 | { |
| 773 | return __find_general_cachep(size, gfpflags); | 791 | return __find_general_cachep(size, gfpflags); |
| 774 | } | 792 | } |
| 775 | EXPORT_SYMBOL(kmem_find_general_cachep); | ||
| 776 | 793 | ||
| 777 | static size_t slab_mgmt_size(size_t nr_objs, size_t align) | 794 | static size_t slab_mgmt_size(size_t nr_objs, size_t align) |
| 778 | { | 795 | { |
| @@ -955,7 +972,39 @@ static int transfer_objects(struct array_cache *to, | |||
| 955 | return nr; | 972 | return nr; |
| 956 | } | 973 | } |
| 957 | 974 | ||
| 958 | #ifdef CONFIG_NUMA | 975 | #ifndef CONFIG_NUMA |
| 976 | |||
| 977 | #define drain_alien_cache(cachep, alien) do { } while (0) | ||
| 978 | #define reap_alien(cachep, l3) do { } while (0) | ||
| 979 | |||
| 980 | static inline struct array_cache **alloc_alien_cache(int node, int limit) | ||
| 981 | { | ||
| 982 | return (struct array_cache **)BAD_ALIEN_MAGIC; | ||
| 983 | } | ||
| 984 | |||
| 985 | static inline void free_alien_cache(struct array_cache **ac_ptr) | ||
| 986 | { | ||
| 987 | } | ||
| 988 | |||
| 989 | static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | ||
| 990 | { | ||
| 991 | return 0; | ||
| 992 | } | ||
| 993 | |||
| 994 | static inline void *alternate_node_alloc(struct kmem_cache *cachep, | ||
| 995 | gfp_t flags) | ||
| 996 | { | ||
| 997 | return NULL; | ||
| 998 | } | ||
| 999 | |||
| 1000 | static inline void *__cache_alloc_node(struct kmem_cache *cachep, | ||
| 1001 | gfp_t flags, int nodeid) | ||
| 1002 | { | ||
| 1003 | return NULL; | ||
| 1004 | } | ||
| 1005 | |||
| 1006 | #else /* CONFIG_NUMA */ | ||
| 1007 | |||
| 959 | static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); | 1008 | static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); |
| 960 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t); | 1009 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t); |
| 961 | 1010 | ||
| @@ -1084,26 +1133,6 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
| 1084 | } | 1133 | } |
| 1085 | return 1; | 1134 | return 1; |
| 1086 | } | 1135 | } |
| 1087 | |||
| 1088 | #else | ||
| 1089 | |||
| 1090 | #define drain_alien_cache(cachep, alien) do { } while (0) | ||
| 1091 | #define reap_alien(cachep, l3) do { } while (0) | ||
| 1092 | |||
| 1093 | static inline struct array_cache **alloc_alien_cache(int node, int limit) | ||
| 1094 | { | ||
| 1095 | return (struct array_cache **) 0x01020304ul; | ||
| 1096 | } | ||
| 1097 | |||
| 1098 | static inline void free_alien_cache(struct array_cache **ac_ptr) | ||
| 1099 | { | ||
| 1100 | } | ||
| 1101 | |||
| 1102 | static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | ||
| 1103 | { | ||
| 1104 | return 0; | ||
| 1105 | } | ||
| 1106 | |||
| 1107 | #endif | 1136 | #endif |
| 1108 | 1137 | ||
| 1109 | static int __cpuinit cpuup_callback(struct notifier_block *nfb, | 1138 | static int __cpuinit cpuup_callback(struct notifier_block *nfb, |
| @@ -1422,7 +1451,6 @@ void __init kmem_cache_init(void) | |||
| 1422 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, | 1451 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
| 1423 | NULL, NULL); | 1452 | NULL, NULL); |
| 1424 | } | 1453 | } |
| 1425 | init_lock_keys(sizes); | ||
| 1426 | 1454 | ||
| 1427 | sizes->cs_dmacachep = kmem_cache_create(names->name_dma, | 1455 | sizes->cs_dmacachep = kmem_cache_create(names->name_dma, |
| 1428 | sizes->cs_size, | 1456 | sizes->cs_size, |
| @@ -1491,10 +1519,15 @@ void __init kmem_cache_init(void) | |||
| 1491 | struct kmem_cache *cachep; | 1519 | struct kmem_cache *cachep; |
| 1492 | mutex_lock(&cache_chain_mutex); | 1520 | mutex_lock(&cache_chain_mutex); |
| 1493 | list_for_each_entry(cachep, &cache_chain, next) | 1521 | list_for_each_entry(cachep, &cache_chain, next) |
| 1494 | enable_cpucache(cachep); | 1522 | if (enable_cpucache(cachep)) |
| 1523 | BUG(); | ||
| 1495 | mutex_unlock(&cache_chain_mutex); | 1524 | mutex_unlock(&cache_chain_mutex); |
| 1496 | } | 1525 | } |
| 1497 | 1526 | ||
| 1527 | /* Annotate slab for lockdep -- annotate the malloc caches */ | ||
| 1528 | init_lock_keys(); | ||
| 1529 | |||
| 1530 | |||
| 1498 | /* Done! */ | 1531 | /* Done! */ |
| 1499 | g_cpucache_up = FULL; | 1532 | g_cpucache_up = FULL; |
| 1500 | 1533 | ||
| @@ -1543,7 +1576,13 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 1543 | */ | 1576 | */ |
| 1544 | flags |= __GFP_COMP; | 1577 | flags |= __GFP_COMP; |
| 1545 | #endif | 1578 | #endif |
| 1546 | flags |= cachep->gfpflags; | 1579 | |
| 1580 | /* | ||
| 1581 | * Under NUMA we want memory on the indicated node. We will handle | ||
| 1582 | * the needed fallback ourselves since we want to serve from our | ||
| 1583 | * per node object lists first for other nodes. | ||
| 1584 | */ | ||
| 1585 | flags |= cachep->gfpflags | GFP_THISNODE; | ||
| 1547 | 1586 | ||
| 1548 | page = alloc_pages_node(nodeid, flags, cachep->gfporder); | 1587 | page = alloc_pages_node(nodeid, flags, cachep->gfporder); |
| 1549 | if (!page) | 1588 | if (!page) |
| @@ -1551,8 +1590,11 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 1551 | 1590 | ||
| 1552 | nr_pages = (1 << cachep->gfporder); | 1591 | nr_pages = (1 << cachep->gfporder); |
| 1553 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1592 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
| 1554 | atomic_add(nr_pages, &slab_reclaim_pages); | 1593 | add_zone_page_state(page_zone(page), |
| 1555 | add_zone_page_state(page_zone(page), NR_SLAB, nr_pages); | 1594 | NR_SLAB_RECLAIMABLE, nr_pages); |
| 1595 | else | ||
| 1596 | add_zone_page_state(page_zone(page), | ||
| 1597 | NR_SLAB_UNRECLAIMABLE, nr_pages); | ||
| 1556 | for (i = 0; i < nr_pages; i++) | 1598 | for (i = 0; i < nr_pages; i++) |
| 1557 | __SetPageSlab(page + i); | 1599 | __SetPageSlab(page + i); |
| 1558 | return page_address(page); | 1600 | return page_address(page); |
| @@ -1567,7 +1609,12 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
| 1567 | struct page *page = virt_to_page(addr); | 1609 | struct page *page = virt_to_page(addr); |
| 1568 | const unsigned long nr_freed = i; | 1610 | const unsigned long nr_freed = i; |
| 1569 | 1611 | ||
| 1570 | sub_zone_page_state(page_zone(page), NR_SLAB, nr_freed); | 1612 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
| 1613 | sub_zone_page_state(page_zone(page), | ||
| 1614 | NR_SLAB_RECLAIMABLE, nr_freed); | ||
| 1615 | else | ||
| 1616 | sub_zone_page_state(page_zone(page), | ||
| 1617 | NR_SLAB_UNRECLAIMABLE, nr_freed); | ||
| 1571 | while (i--) { | 1618 | while (i--) { |
| 1572 | BUG_ON(!PageSlab(page)); | 1619 | BUG_ON(!PageSlab(page)); |
| 1573 | __ClearPageSlab(page); | 1620 | __ClearPageSlab(page); |
| @@ -1576,8 +1623,6 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
| 1576 | if (current->reclaim_state) | 1623 | if (current->reclaim_state) |
| 1577 | current->reclaim_state->reclaimed_slab += nr_freed; | 1624 | current->reclaim_state->reclaimed_slab += nr_freed; |
| 1578 | free_pages((unsigned long)addr, cachep->gfporder); | 1625 | free_pages((unsigned long)addr, cachep->gfporder); |
| 1579 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | ||
| 1580 | atomic_sub(1 << cachep->gfporder, &slab_reclaim_pages); | ||
| 1581 | } | 1626 | } |
| 1582 | 1627 | ||
| 1583 | static void kmem_rcu_free(struct rcu_head *head) | 1628 | static void kmem_rcu_free(struct rcu_head *head) |
| @@ -1834,6 +1879,27 @@ static void set_up_list3s(struct kmem_cache *cachep, int index) | |||
| 1834 | } | 1879 | } |
| 1835 | } | 1880 | } |
| 1836 | 1881 | ||
| 1882 | static void __kmem_cache_destroy(struct kmem_cache *cachep) | ||
| 1883 | { | ||
| 1884 | int i; | ||
| 1885 | struct kmem_list3 *l3; | ||
| 1886 | |||
| 1887 | for_each_online_cpu(i) | ||
| 1888 | kfree(cachep->array[i]); | ||
| 1889 | |||
| 1890 | /* NUMA: free the list3 structures */ | ||
| 1891 | for_each_online_node(i) { | ||
| 1892 | l3 = cachep->nodelists[i]; | ||
| 1893 | if (l3) { | ||
| 1894 | kfree(l3->shared); | ||
| 1895 | free_alien_cache(l3->alien); | ||
| 1896 | kfree(l3); | ||
| 1897 | } | ||
| 1898 | } | ||
| 1899 | kmem_cache_free(&cache_cache, cachep); | ||
| 1900 | } | ||
| 1901 | |||
| 1902 | |||
| 1837 | /** | 1903 | /** |
| 1838 | * calculate_slab_order - calculate size (page order) of slabs | 1904 | * calculate_slab_order - calculate size (page order) of slabs |
| 1839 | * @cachep: pointer to the cache that is being created | 1905 | * @cachep: pointer to the cache that is being created |
| @@ -1904,12 +1970,11 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
| 1904 | return left_over; | 1970 | return left_over; |
| 1905 | } | 1971 | } |
| 1906 | 1972 | ||
| 1907 | static void setup_cpu_cache(struct kmem_cache *cachep) | 1973 | static int setup_cpu_cache(struct kmem_cache *cachep) |
| 1908 | { | 1974 | { |
| 1909 | if (g_cpucache_up == FULL) { | 1975 | if (g_cpucache_up == FULL) |
| 1910 | enable_cpucache(cachep); | 1976 | return enable_cpucache(cachep); |
| 1911 | return; | 1977 | |
| 1912 | } | ||
| 1913 | if (g_cpucache_up == NONE) { | 1978 | if (g_cpucache_up == NONE) { |
| 1914 | /* | 1979 | /* |
| 1915 | * Note: the first kmem_cache_create must create the cache | 1980 | * Note: the first kmem_cache_create must create the cache |
| @@ -1956,6 +2021,7 @@ static void setup_cpu_cache(struct kmem_cache *cachep) | |||
| 1956 | cpu_cache_get(cachep)->touched = 0; | 2021 | cpu_cache_get(cachep)->touched = 0; |
| 1957 | cachep->batchcount = 1; | 2022 | cachep->batchcount = 1; |
| 1958 | cachep->limit = BOOT_CPUCACHE_ENTRIES; | 2023 | cachep->limit = BOOT_CPUCACHE_ENTRIES; |
| 2024 | return 0; | ||
| 1959 | } | 2025 | } |
| 1960 | 2026 | ||
| 1961 | /** | 2027 | /** |
| @@ -2097,6 +2163,15 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 2097 | } else { | 2163 | } else { |
| 2098 | ralign = BYTES_PER_WORD; | 2164 | ralign = BYTES_PER_WORD; |
| 2099 | } | 2165 | } |
| 2166 | |||
| 2167 | /* | ||
| 2168 | * Redzoning and user store require word alignment. Note this will be | ||
| 2169 | * overridden by architecture or caller mandated alignment if either | ||
| 2170 | * is greater than BYTES_PER_WORD. | ||
| 2171 | */ | ||
| 2172 | if (flags & SLAB_RED_ZONE || flags & SLAB_STORE_USER) | ||
| 2173 | ralign = BYTES_PER_WORD; | ||
| 2174 | |||
| 2100 | /* 2) arch mandated alignment: disables debug if necessary */ | 2175 | /* 2) arch mandated alignment: disables debug if necessary */ |
| 2101 | if (ralign < ARCH_SLAB_MINALIGN) { | 2176 | if (ralign < ARCH_SLAB_MINALIGN) { |
| 2102 | ralign = ARCH_SLAB_MINALIGN; | 2177 | ralign = ARCH_SLAB_MINALIGN; |
| @@ -2110,8 +2185,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 2110 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | 2185 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); |
| 2111 | } | 2186 | } |
| 2112 | /* | 2187 | /* |
| 2113 | * 4) Store it. Note that the debug code below can reduce | 2188 | * 4) Store it. |
| 2114 | * the alignment to BYTES_PER_WORD. | ||
| 2115 | */ | 2189 | */ |
| 2116 | align = ralign; | 2190 | align = ralign; |
| 2117 | 2191 | ||
| @@ -2123,20 +2197,19 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 2123 | #if DEBUG | 2197 | #if DEBUG |
| 2124 | cachep->obj_size = size; | 2198 | cachep->obj_size = size; |
| 2125 | 2199 | ||
| 2200 | /* | ||
| 2201 | * Both debugging options require word-alignment which is calculated | ||
| 2202 | * into align above. | ||
| 2203 | */ | ||
| 2126 | if (flags & SLAB_RED_ZONE) { | 2204 | if (flags & SLAB_RED_ZONE) { |
| 2127 | /* redzoning only works with word aligned caches */ | ||
| 2128 | align = BYTES_PER_WORD; | ||
| 2129 | |||
| 2130 | /* add space for red zone words */ | 2205 | /* add space for red zone words */ |
| 2131 | cachep->obj_offset += BYTES_PER_WORD; | 2206 | cachep->obj_offset += BYTES_PER_WORD; |
| 2132 | size += 2 * BYTES_PER_WORD; | 2207 | size += 2 * BYTES_PER_WORD; |
| 2133 | } | 2208 | } |
| 2134 | if (flags & SLAB_STORE_USER) { | 2209 | if (flags & SLAB_STORE_USER) { |
| 2135 | /* user store requires word alignment and | 2210 | /* user store requires one word storage behind the end of |
| 2136 | * one word storage behind the end of the real | 2211 | * the real object. |
| 2137 | * object. | ||
| 2138 | */ | 2212 | */ |
| 2139 | align = BYTES_PER_WORD; | ||
| 2140 | size += BYTES_PER_WORD; | 2213 | size += BYTES_PER_WORD; |
| 2141 | } | 2214 | } |
| 2142 | #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) | 2215 | #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) |
| @@ -2200,14 +2273,26 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 2200 | cachep->gfpflags |= GFP_DMA; | 2273 | cachep->gfpflags |= GFP_DMA; |
| 2201 | cachep->buffer_size = size; | 2274 | cachep->buffer_size = size; |
| 2202 | 2275 | ||
| 2203 | if (flags & CFLGS_OFF_SLAB) | 2276 | if (flags & CFLGS_OFF_SLAB) { |
| 2204 | cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); | 2277 | cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); |
| 2278 | /* | ||
| 2279 | * This is a possibility for one of the malloc_sizes caches. | ||
| 2280 | * But since we go off slab only for object size greater than | ||
| 2281 | * PAGE_SIZE/8, and malloc_sizes gets created in ascending order, | ||
| 2282 | * this should not happen at all. | ||
| 2283 | * But leave a BUG_ON for some lucky dude. | ||
| 2284 | */ | ||
| 2285 | BUG_ON(!cachep->slabp_cache); | ||
| 2286 | } | ||
| 2205 | cachep->ctor = ctor; | 2287 | cachep->ctor = ctor; |
| 2206 | cachep->dtor = dtor; | 2288 | cachep->dtor = dtor; |
| 2207 | cachep->name = name; | 2289 | cachep->name = name; |
| 2208 | 2290 | ||
| 2209 | 2291 | if (setup_cpu_cache(cachep)) { | |
| 2210 | setup_cpu_cache(cachep); | 2292 | __kmem_cache_destroy(cachep); |
| 2293 | cachep = NULL; | ||
| 2294 | goto oops; | ||
| 2295 | } | ||
| 2211 | 2296 | ||
| 2212 | /* cache setup completed, link it into the list */ | 2297 | /* cache setup completed, link it into the list */ |
| 2213 | list_add(&cachep->next, &cache_chain); | 2298 | list_add(&cachep->next, &cache_chain); |
| @@ -2375,7 +2460,6 @@ EXPORT_SYMBOL(kmem_cache_shrink); | |||
| 2375 | * @cachep: the cache to destroy | 2460 | * @cachep: the cache to destroy |
| 2376 | * | 2461 | * |
| 2377 | * Remove a struct kmem_cache object from the slab cache. | 2462 | * Remove a struct kmem_cache object from the slab cache. |
| 2378 | * Returns 0 on success. | ||
| 2379 | * | 2463 | * |
| 2380 | * It is expected this function will be called by a module when it is | 2464 | * It is expected this function will be called by a module when it is |
| 2381 | * unloaded. This will remove the cache completely, and avoid a duplicate | 2465 | * unloaded. This will remove the cache completely, and avoid a duplicate |
| @@ -2387,11 +2471,8 @@ EXPORT_SYMBOL(kmem_cache_shrink); | |||
| 2387 | * The caller must guarantee that noone will allocate memory from the cache | 2471 | * The caller must guarantee that noone will allocate memory from the cache |
| 2388 | * during the kmem_cache_destroy(). | 2472 | * during the kmem_cache_destroy(). |
| 2389 | */ | 2473 | */ |
| 2390 | int kmem_cache_destroy(struct kmem_cache *cachep) | 2474 | void kmem_cache_destroy(struct kmem_cache *cachep) |
| 2391 | { | 2475 | { |
| 2392 | int i; | ||
| 2393 | struct kmem_list3 *l3; | ||
| 2394 | |||
| 2395 | BUG_ON(!cachep || in_interrupt()); | 2476 | BUG_ON(!cachep || in_interrupt()); |
| 2396 | 2477 | ||
| 2397 | /* Don't let CPUs to come and go */ | 2478 | /* Don't let CPUs to come and go */ |
| @@ -2411,31 +2492,28 @@ int kmem_cache_destroy(struct kmem_cache *cachep) | |||
| 2411 | list_add(&cachep->next, &cache_chain); | 2492 | list_add(&cachep->next, &cache_chain); |
| 2412 | mutex_unlock(&cache_chain_mutex); | 2493 | mutex_unlock(&cache_chain_mutex); |
| 2413 | unlock_cpu_hotplug(); | 2494 | unlock_cpu_hotplug(); |
| 2414 | return 1; | 2495 | return; |
| 2415 | } | 2496 | } |
| 2416 | 2497 | ||
| 2417 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) | 2498 | if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) |
| 2418 | synchronize_rcu(); | 2499 | synchronize_rcu(); |
| 2419 | 2500 | ||
| 2420 | for_each_online_cpu(i) | 2501 | __kmem_cache_destroy(cachep); |
| 2421 | kfree(cachep->array[i]); | ||
| 2422 | |||
| 2423 | /* NUMA: free the list3 structures */ | ||
| 2424 | for_each_online_node(i) { | ||
| 2425 | l3 = cachep->nodelists[i]; | ||
| 2426 | if (l3) { | ||
| 2427 | kfree(l3->shared); | ||
| 2428 | free_alien_cache(l3->alien); | ||
| 2429 | kfree(l3); | ||
| 2430 | } | ||
| 2431 | } | ||
| 2432 | kmem_cache_free(&cache_cache, cachep); | ||
| 2433 | unlock_cpu_hotplug(); | 2502 | unlock_cpu_hotplug(); |
| 2434 | return 0; | ||
| 2435 | } | 2503 | } |
| 2436 | EXPORT_SYMBOL(kmem_cache_destroy); | 2504 | EXPORT_SYMBOL(kmem_cache_destroy); |
| 2437 | 2505 | ||
| 2438 | /* Get the memory for a slab management obj. */ | 2506 | /* |
| 2507 | * Get the memory for a slab management obj. | ||
| 2508 | * For a slab cache when the slab descriptor is off-slab, slab descriptors | ||
| 2509 | * always come from malloc_sizes caches. The slab descriptor cannot | ||
| 2510 | * come from the same cache which is getting created because, | ||
| 2511 | * when we are searching for an appropriate cache for these | ||
| 2512 | * descriptors in kmem_cache_create, we search through the malloc_sizes array. | ||
| 2513 | * If we are creating a malloc_sizes cache here it would not be visible to | ||
| 2514 | * kmem_find_general_cachep till the initialization is complete. | ||
| 2515 | * Hence we cannot have slabp_cache same as the original cache. | ||
| 2516 | */ | ||
| 2439 | static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, | 2517 | static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, |
| 2440 | int colour_off, gfp_t local_flags, | 2518 | int colour_off, gfp_t local_flags, |
| 2441 | int nodeid) | 2519 | int nodeid) |
| @@ -2968,14 +3046,6 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
| 2968 | void *objp; | 3046 | void *objp; |
| 2969 | struct array_cache *ac; | 3047 | struct array_cache *ac; |
| 2970 | 3048 | ||
| 2971 | #ifdef CONFIG_NUMA | ||
| 2972 | if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) { | ||
| 2973 | objp = alternate_node_alloc(cachep, flags); | ||
| 2974 | if (objp != NULL) | ||
| 2975 | return objp; | ||
| 2976 | } | ||
| 2977 | #endif | ||
| 2978 | |||
| 2979 | check_irq_off(); | 3049 | check_irq_off(); |
| 2980 | ac = cpu_cache_get(cachep); | 3050 | ac = cpu_cache_get(cachep); |
| 2981 | if (likely(ac->avail)) { | 3051 | if (likely(ac->avail)) { |
| @@ -2993,12 +3063,24 @@ static __always_inline void *__cache_alloc(struct kmem_cache *cachep, | |||
| 2993 | gfp_t flags, void *caller) | 3063 | gfp_t flags, void *caller) |
| 2994 | { | 3064 | { |
| 2995 | unsigned long save_flags; | 3065 | unsigned long save_flags; |
| 2996 | void *objp; | 3066 | void *objp = NULL; |
| 2997 | 3067 | ||
| 2998 | cache_alloc_debugcheck_before(cachep, flags); | 3068 | cache_alloc_debugcheck_before(cachep, flags); |
| 2999 | 3069 | ||
| 3000 | local_irq_save(save_flags); | 3070 | local_irq_save(save_flags); |
| 3001 | objp = ____cache_alloc(cachep, flags); | 3071 | |
| 3072 | if (unlikely(NUMA_BUILD && | ||
| 3073 | current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) | ||
| 3074 | objp = alternate_node_alloc(cachep, flags); | ||
| 3075 | |||
| 3076 | if (!objp) | ||
| 3077 | objp = ____cache_alloc(cachep, flags); | ||
| 3078 | /* | ||
| 3079 | * We may just have run out of memory on the local node. | ||
| 3080 | * __cache_alloc_node() knows how to locate memory on other nodes | ||
| 3081 | */ | ||
| 3082 | if (NUMA_BUILD && !objp) | ||
| 3083 | objp = __cache_alloc_node(cachep, flags, numa_node_id()); | ||
| 3002 | local_irq_restore(save_flags); | 3084 | local_irq_restore(save_flags); |
| 3003 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, | 3085 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, |
| 3004 | caller); | 3086 | caller); |
| @@ -3017,7 +3099,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
| 3017 | { | 3099 | { |
| 3018 | int nid_alloc, nid_here; | 3100 | int nid_alloc, nid_here; |
| 3019 | 3101 | ||
| 3020 | if (in_interrupt()) | 3102 | if (in_interrupt() || (flags & __GFP_THISNODE)) |
| 3021 | return NULL; | 3103 | return NULL; |
| 3022 | nid_alloc = nid_here = numa_node_id(); | 3104 | nid_alloc = nid_here = numa_node_id(); |
| 3023 | if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) | 3105 | if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) |
| @@ -3030,6 +3112,28 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
| 3030 | } | 3112 | } |
| 3031 | 3113 | ||
| 3032 | /* | 3114 | /* |
| 3115 | * Fallback function if there was no memory available and no objects on a | ||
| 3116 | * certain node and we are allowed to fall back. We mimick the behavior of | ||
| 3117 | * the page allocator. We fall back according to a zonelist determined by | ||
| 3118 | * the policy layer while obeying cpuset constraints. | ||
| 3119 | */ | ||
| 3120 | void *fallback_alloc(struct kmem_cache *cache, gfp_t flags) | ||
| 3121 | { | ||
| 3122 | struct zonelist *zonelist = &NODE_DATA(slab_node(current->mempolicy)) | ||
| 3123 | ->node_zonelists[gfp_zone(flags)]; | ||
| 3124 | struct zone **z; | ||
| 3125 | void *obj = NULL; | ||
| 3126 | |||
| 3127 | for (z = zonelist->zones; *z && !obj; z++) | ||
| 3128 | if (zone_idx(*z) <= ZONE_NORMAL && | ||
| 3129 | cpuset_zone_allowed(*z, flags)) | ||
| 3130 | obj = __cache_alloc_node(cache, | ||
| 3131 | flags | __GFP_THISNODE, | ||
| 3132 | zone_to_nid(*z)); | ||
| 3133 | return obj; | ||
| 3134 | } | ||
| 3135 | |||
| 3136 | /* | ||
| 3033 | * A interface to enable slab creation on nodeid | 3137 | * A interface to enable slab creation on nodeid |
| 3034 | */ | 3138 | */ |
| 3035 | static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, | 3139 | static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, |
| @@ -3082,11 +3186,15 @@ retry: | |||
| 3082 | must_grow: | 3186 | must_grow: |
| 3083 | spin_unlock(&l3->list_lock); | 3187 | spin_unlock(&l3->list_lock); |
| 3084 | x = cache_grow(cachep, flags, nodeid); | 3188 | x = cache_grow(cachep, flags, nodeid); |
| 3189 | if (x) | ||
| 3190 | goto retry; | ||
| 3085 | 3191 | ||
| 3086 | if (!x) | 3192 | if (!(flags & __GFP_THISNODE)) |
| 3087 | return NULL; | 3193 | /* Unable to grow the cache. Fall back to other nodes. */ |
| 3194 | return fallback_alloc(cachep, flags); | ||
| 3195 | |||
| 3196 | return NULL; | ||
| 3088 | 3197 | ||
| 3089 | goto retry; | ||
| 3090 | done: | 3198 | done: |
| 3091 | return obj; | 3199 | return obj; |
| 3092 | } | 3200 | } |
| @@ -3119,6 +3227,12 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, | |||
| 3119 | if (slabp->inuse == 0) { | 3227 | if (slabp->inuse == 0) { |
| 3120 | if (l3->free_objects > l3->free_limit) { | 3228 | if (l3->free_objects > l3->free_limit) { |
| 3121 | l3->free_objects -= cachep->num; | 3229 | l3->free_objects -= cachep->num; |
| 3230 | /* No need to drop any previously held | ||
| 3231 | * lock here, even if we have a off-slab slab | ||
| 3232 | * descriptor it is guaranteed to come from | ||
| 3233 | * a different cache, refer to comments before | ||
| 3234 | * alloc_slabmgmt. | ||
| 3235 | */ | ||
| 3122 | slab_destroy(cachep, slabp); | 3236 | slab_destroy(cachep, slabp); |
| 3123 | } else { | 3237 | } else { |
| 3124 | list_add(&slabp->list, &l3->slabs_free); | 3238 | list_add(&slabp->list, &l3->slabs_free); |
| @@ -3317,7 +3431,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 3317 | } | 3431 | } |
| 3318 | EXPORT_SYMBOL(kmem_cache_alloc_node); | 3432 | EXPORT_SYMBOL(kmem_cache_alloc_node); |
| 3319 | 3433 | ||
| 3320 | void *kmalloc_node(size_t size, gfp_t flags, int node) | 3434 | void *__kmalloc_node(size_t size, gfp_t flags, int node) |
| 3321 | { | 3435 | { |
| 3322 | struct kmem_cache *cachep; | 3436 | struct kmem_cache *cachep; |
| 3323 | 3437 | ||
| @@ -3326,7 +3440,7 @@ void *kmalloc_node(size_t size, gfp_t flags, int node) | |||
| 3326 | return NULL; | 3440 | return NULL; |
| 3327 | return kmem_cache_alloc_node(cachep, flags, node); | 3441 | return kmem_cache_alloc_node(cachep, flags, node); |
| 3328 | } | 3442 | } |
| 3329 | EXPORT_SYMBOL(kmalloc_node); | 3443 | EXPORT_SYMBOL(__kmalloc_node); |
| 3330 | #endif | 3444 | #endif |
| 3331 | 3445 | ||
| 3332 | /** | 3446 | /** |
| @@ -3370,55 +3484,6 @@ void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller) | |||
| 3370 | EXPORT_SYMBOL(__kmalloc_track_caller); | 3484 | EXPORT_SYMBOL(__kmalloc_track_caller); |
| 3371 | #endif | 3485 | #endif |
| 3372 | 3486 | ||
| 3373 | #ifdef CONFIG_SMP | ||
| 3374 | /** | ||
| 3375 | * __alloc_percpu - allocate one copy of the object for every present | ||
| 3376 | * cpu in the system, zeroing them. | ||
| 3377 | * Objects should be dereferenced using the per_cpu_ptr macro only. | ||
| 3378 | * | ||
| 3379 | * @size: how many bytes of memory are required. | ||
| 3380 | */ | ||
| 3381 | void *__alloc_percpu(size_t size) | ||
| 3382 | { | ||
| 3383 | int i; | ||
| 3384 | struct percpu_data *pdata = kmalloc(sizeof(*pdata), GFP_KERNEL); | ||
| 3385 | |||
| 3386 | if (!pdata) | ||
| 3387 | return NULL; | ||
| 3388 | |||
| 3389 | /* | ||
| 3390 | * Cannot use for_each_online_cpu since a cpu may come online | ||
| 3391 | * and we have no way of figuring out how to fix the array | ||
| 3392 | * that we have allocated then.... | ||
| 3393 | */ | ||
| 3394 | for_each_possible_cpu(i) { | ||
| 3395 | int node = cpu_to_node(i); | ||
| 3396 | |||
| 3397 | if (node_online(node)) | ||
| 3398 | pdata->ptrs[i] = kmalloc_node(size, GFP_KERNEL, node); | ||
| 3399 | else | ||
| 3400 | pdata->ptrs[i] = kmalloc(size, GFP_KERNEL); | ||
| 3401 | |||
| 3402 | if (!pdata->ptrs[i]) | ||
| 3403 | goto unwind_oom; | ||
| 3404 | memset(pdata->ptrs[i], 0, size); | ||
| 3405 | } | ||
| 3406 | |||
| 3407 | /* Catch derefs w/o wrappers */ | ||
| 3408 | return (void *)(~(unsigned long)pdata); | ||
| 3409 | |||
| 3410 | unwind_oom: | ||
| 3411 | while (--i >= 0) { | ||
| 3412 | if (!cpu_possible(i)) | ||
| 3413 | continue; | ||
| 3414 | kfree(pdata->ptrs[i]); | ||
| 3415 | } | ||
| 3416 | kfree(pdata); | ||
| 3417 | return NULL; | ||
| 3418 | } | ||
| 3419 | EXPORT_SYMBOL(__alloc_percpu); | ||
| 3420 | #endif | ||
| 3421 | |||
| 3422 | /** | 3487 | /** |
| 3423 | * kmem_cache_free - Deallocate an object | 3488 | * kmem_cache_free - Deallocate an object |
| 3424 | * @cachep: The cache the allocation was from. | 3489 | * @cachep: The cache the allocation was from. |
| @@ -3464,29 +3529,6 @@ void kfree(const void *objp) | |||
| 3464 | } | 3529 | } |
| 3465 | EXPORT_SYMBOL(kfree); | 3530 | EXPORT_SYMBOL(kfree); |
| 3466 | 3531 | ||
| 3467 | #ifdef CONFIG_SMP | ||
| 3468 | /** | ||
| 3469 | * free_percpu - free previously allocated percpu memory | ||
| 3470 | * @objp: pointer returned by alloc_percpu. | ||
| 3471 | * | ||
| 3472 | * Don't free memory not originally allocated by alloc_percpu() | ||
| 3473 | * The complemented objp is to check for that. | ||
| 3474 | */ | ||
| 3475 | void free_percpu(const void *objp) | ||
| 3476 | { | ||
| 3477 | int i; | ||
| 3478 | struct percpu_data *p = (struct percpu_data *)(~(unsigned long)objp); | ||
| 3479 | |||
| 3480 | /* | ||
| 3481 | * We allocate for all cpus so we cannot use for online cpu here. | ||
| 3482 | */ | ||
| 3483 | for_each_possible_cpu(i) | ||
| 3484 | kfree(p->ptrs[i]); | ||
| 3485 | kfree(p); | ||
| 3486 | } | ||
| 3487 | EXPORT_SYMBOL(free_percpu); | ||
| 3488 | #endif | ||
| 3489 | |||
| 3490 | unsigned int kmem_cache_size(struct kmem_cache *cachep) | 3532 | unsigned int kmem_cache_size(struct kmem_cache *cachep) |
| 3491 | { | 3533 | { |
| 3492 | return obj_size(cachep); | 3534 | return obj_size(cachep); |
| @@ -3603,22 +3645,26 @@ static void do_ccupdate_local(void *info) | |||
| 3603 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | 3645 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, |
| 3604 | int batchcount, int shared) | 3646 | int batchcount, int shared) |
| 3605 | { | 3647 | { |
| 3606 | struct ccupdate_struct new; | 3648 | struct ccupdate_struct *new; |
| 3607 | int i, err; | 3649 | int i; |
| 3650 | |||
| 3651 | new = kzalloc(sizeof(*new), GFP_KERNEL); | ||
| 3652 | if (!new) | ||
| 3653 | return -ENOMEM; | ||
| 3608 | 3654 | ||
| 3609 | memset(&new.new, 0, sizeof(new.new)); | ||
| 3610 | for_each_online_cpu(i) { | 3655 | for_each_online_cpu(i) { |
| 3611 | new.new[i] = alloc_arraycache(cpu_to_node(i), limit, | 3656 | new->new[i] = alloc_arraycache(cpu_to_node(i), limit, |
| 3612 | batchcount); | 3657 | batchcount); |
| 3613 | if (!new.new[i]) { | 3658 | if (!new->new[i]) { |
| 3614 | for (i--; i >= 0; i--) | 3659 | for (i--; i >= 0; i--) |
| 3615 | kfree(new.new[i]); | 3660 | kfree(new->new[i]); |
| 3661 | kfree(new); | ||
| 3616 | return -ENOMEM; | 3662 | return -ENOMEM; |
| 3617 | } | 3663 | } |
| 3618 | } | 3664 | } |
| 3619 | new.cachep = cachep; | 3665 | new->cachep = cachep; |
| 3620 | 3666 | ||
| 3621 | on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1); | 3667 | on_each_cpu(do_ccupdate_local, (void *)new, 1, 1); |
| 3622 | 3668 | ||
| 3623 | check_irq_on(); | 3669 | check_irq_on(); |
| 3624 | cachep->batchcount = batchcount; | 3670 | cachep->batchcount = batchcount; |
| @@ -3626,7 +3672,7 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | |||
| 3626 | cachep->shared = shared; | 3672 | cachep->shared = shared; |
| 3627 | 3673 | ||
| 3628 | for_each_online_cpu(i) { | 3674 | for_each_online_cpu(i) { |
| 3629 | struct array_cache *ccold = new.new[i]; | 3675 | struct array_cache *ccold = new->new[i]; |
| 3630 | if (!ccold) | 3676 | if (!ccold) |
| 3631 | continue; | 3677 | continue; |
| 3632 | spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); | 3678 | spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); |
| @@ -3634,18 +3680,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | |||
| 3634 | spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); | 3680 | spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); |
| 3635 | kfree(ccold); | 3681 | kfree(ccold); |
| 3636 | } | 3682 | } |
| 3637 | 3683 | kfree(new); | |
| 3638 | err = alloc_kmemlist(cachep); | 3684 | return alloc_kmemlist(cachep); |
| 3639 | if (err) { | ||
| 3640 | printk(KERN_ERR "alloc_kmemlist failed for %s, error %d.\n", | ||
| 3641 | cachep->name, -err); | ||
| 3642 | BUG(); | ||
| 3643 | } | ||
| 3644 | return 0; | ||
| 3645 | } | 3685 | } |
| 3646 | 3686 | ||
| 3647 | /* Called with cache_chain_mutex held always */ | 3687 | /* Called with cache_chain_mutex held always */ |
| 3648 | static void enable_cpucache(struct kmem_cache *cachep) | 3688 | static int enable_cpucache(struct kmem_cache *cachep) |
| 3649 | { | 3689 | { |
| 3650 | int err; | 3690 | int err; |
| 3651 | int limit, shared; | 3691 | int limit, shared; |
| @@ -3697,6 +3737,7 @@ static void enable_cpucache(struct kmem_cache *cachep) | |||
| 3697 | if (err) | 3737 | if (err) |
| 3698 | printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", | 3738 | printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", |
| 3699 | cachep->name, -err); | 3739 | cachep->name, -err); |
| 3740 | return err; | ||
| 3700 | } | 3741 | } |
| 3701 | 3742 | ||
| 3702 | /* | 3743 | /* |
| @@ -4157,6 +4198,7 @@ static int leaks_show(struct seq_file *m, void *p) | |||
| 4157 | show_symbol(m, n[2*i+2]); | 4198 | show_symbol(m, n[2*i+2]); |
| 4158 | seq_putc(m, '\n'); | 4199 | seq_putc(m, '\n'); |
| 4159 | } | 4200 | } |
| 4201 | |||
| 4160 | return 0; | 4202 | return 0; |
| 4161 | } | 4203 | } |
| 4162 | 4204 | ||
