diff options
Diffstat (limited to 'mm/slab.c')
-rw-r--r-- | mm/slab.c | 198 |
1 files changed, 134 insertions, 64 deletions
@@ -115,6 +115,7 @@ | |||
115 | #include <linux/reciprocal_div.h> | 115 | #include <linux/reciprocal_div.h> |
116 | #include <linux/debugobjects.h> | 116 | #include <linux/debugobjects.h> |
117 | #include <linux/kmemcheck.h> | 117 | #include <linux/kmemcheck.h> |
118 | #include <linux/memory.h> | ||
118 | 119 | ||
119 | #include <asm/cacheflush.h> | 120 | #include <asm/cacheflush.h> |
120 | #include <asm/tlbflush.h> | 121 | #include <asm/tlbflush.h> |
@@ -144,30 +145,6 @@ | |||
144 | #define BYTES_PER_WORD sizeof(void *) | 145 | #define BYTES_PER_WORD sizeof(void *) |
145 | #define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long)) | 146 | #define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long)) |
146 | 147 | ||
147 | #ifndef ARCH_KMALLOC_MINALIGN | ||
148 | /* | ||
149 | * Enforce a minimum alignment for the kmalloc caches. | ||
150 | * Usually, the kmalloc caches are cache_line_size() aligned, except when | ||
151 | * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned. | ||
152 | * Some archs want to perform DMA into kmalloc caches and need a guaranteed | ||
153 | * alignment larger than the alignment of a 64-bit integer. | ||
154 | * ARCH_KMALLOC_MINALIGN allows that. | ||
155 | * Note that increasing this value may disable some debug features. | ||
156 | */ | ||
157 | #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) | ||
158 | #endif | ||
159 | |||
160 | #ifndef ARCH_SLAB_MINALIGN | ||
161 | /* | ||
162 | * Enforce a minimum alignment for all caches. | ||
163 | * Intended for archs that get misalignment faults even for BYTES_PER_WORD | ||
164 | * aligned buffers. Includes ARCH_KMALLOC_MINALIGN. | ||
165 | * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables | ||
166 | * some debug features. | ||
167 | */ | ||
168 | #define ARCH_SLAB_MINALIGN 0 | ||
169 | #endif | ||
170 | |||
171 | #ifndef ARCH_KMALLOC_FLAGS | 148 | #ifndef ARCH_KMALLOC_FLAGS |
172 | #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN | 149 | #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN |
173 | #endif | 150 | #endif |
@@ -1102,6 +1079,52 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
1102 | } | 1079 | } |
1103 | #endif | 1080 | #endif |
1104 | 1081 | ||
1082 | /* | ||
1083 | * Allocates and initializes nodelists for a node on each slab cache, used for | ||
1084 | * either memory or cpu hotplug. If memory is being hot-added, the kmem_list3 | ||
1085 | * will be allocated off-node since memory is not yet online for the new node. | ||
1086 | * When hotplugging memory or a cpu, existing nodelists are not replaced if | ||
1087 | * already in use. | ||
1088 | * | ||
1089 | * Must hold cache_chain_mutex. | ||
1090 | */ | ||
1091 | static int init_cache_nodelists_node(int node) | ||
1092 | { | ||
1093 | struct kmem_cache *cachep; | ||
1094 | struct kmem_list3 *l3; | ||
1095 | const int memsize = sizeof(struct kmem_list3); | ||
1096 | |||
1097 | list_for_each_entry(cachep, &cache_chain, next) { | ||
1098 | /* | ||
1099 | * Set up the size64 kmemlist for cpu before we can | ||
1100 | * begin anything. Make sure some other cpu on this | ||
1101 | * node has not already allocated this | ||
1102 | */ | ||
1103 | if (!cachep->nodelists[node]) { | ||
1104 | l3 = kmalloc_node(memsize, GFP_KERNEL, node); | ||
1105 | if (!l3) | ||
1106 | return -ENOMEM; | ||
1107 | kmem_list3_init(l3); | ||
1108 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | ||
1109 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
1110 | |||
1111 | /* | ||
1112 | * The l3s don't come and go as CPUs come and | ||
1113 | * go. cache_chain_mutex is sufficient | ||
1114 | * protection here. | ||
1115 | */ | ||
1116 | cachep->nodelists[node] = l3; | ||
1117 | } | ||
1118 | |||
1119 | spin_lock_irq(&cachep->nodelists[node]->list_lock); | ||
1120 | cachep->nodelists[node]->free_limit = | ||
1121 | (1 + nr_cpus_node(node)) * | ||
1122 | cachep->batchcount + cachep->num; | ||
1123 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); | ||
1124 | } | ||
1125 | return 0; | ||
1126 | } | ||
1127 | |||
1105 | static void __cpuinit cpuup_canceled(long cpu) | 1128 | static void __cpuinit cpuup_canceled(long cpu) |
1106 | { | 1129 | { |
1107 | struct kmem_cache *cachep; | 1130 | struct kmem_cache *cachep; |
@@ -1172,7 +1195,7 @@ static int __cpuinit cpuup_prepare(long cpu) | |||
1172 | struct kmem_cache *cachep; | 1195 | struct kmem_cache *cachep; |
1173 | struct kmem_list3 *l3 = NULL; | 1196 | struct kmem_list3 *l3 = NULL; |
1174 | int node = cpu_to_node(cpu); | 1197 | int node = cpu_to_node(cpu); |
1175 | const int memsize = sizeof(struct kmem_list3); | 1198 | int err; |
1176 | 1199 | ||
1177 | /* | 1200 | /* |
1178 | * We need to do this right in the beginning since | 1201 | * We need to do this right in the beginning since |
@@ -1180,35 +1203,9 @@ static int __cpuinit cpuup_prepare(long cpu) | |||
1180 | * kmalloc_node allows us to add the slab to the right | 1203 | * kmalloc_node allows us to add the slab to the right |
1181 | * kmem_list3 and not this cpu's kmem_list3 | 1204 | * kmem_list3 and not this cpu's kmem_list3 |
1182 | */ | 1205 | */ |
1183 | 1206 | err = init_cache_nodelists_node(node); | |
1184 | list_for_each_entry(cachep, &cache_chain, next) { | 1207 | if (err < 0) |
1185 | /* | 1208 | goto bad; |
1186 | * Set up the size64 kmemlist for cpu before we can | ||
1187 | * begin anything. Make sure some other cpu on this | ||
1188 | * node has not already allocated this | ||
1189 | */ | ||
1190 | if (!cachep->nodelists[node]) { | ||
1191 | l3 = kmalloc_node(memsize, GFP_KERNEL, node); | ||
1192 | if (!l3) | ||
1193 | goto bad; | ||
1194 | kmem_list3_init(l3); | ||
1195 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | ||
1196 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
1197 | |||
1198 | /* | ||
1199 | * The l3s don't come and go as CPUs come and | ||
1200 | * go. cache_chain_mutex is sufficient | ||
1201 | * protection here. | ||
1202 | */ | ||
1203 | cachep->nodelists[node] = l3; | ||
1204 | } | ||
1205 | |||
1206 | spin_lock_irq(&cachep->nodelists[node]->list_lock); | ||
1207 | cachep->nodelists[node]->free_limit = | ||
1208 | (1 + nr_cpus_node(node)) * | ||
1209 | cachep->batchcount + cachep->num; | ||
1210 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); | ||
1211 | } | ||
1212 | 1209 | ||
1213 | /* | 1210 | /* |
1214 | * Now we can go ahead with allocating the shared arrays and | 1211 | * Now we can go ahead with allocating the shared arrays and |
@@ -1331,11 +1328,75 @@ static struct notifier_block __cpuinitdata cpucache_notifier = { | |||
1331 | &cpuup_callback, NULL, 0 | 1328 | &cpuup_callback, NULL, 0 |
1332 | }; | 1329 | }; |
1333 | 1330 | ||
1331 | #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG) | ||
1332 | /* | ||
1333 | * Drains freelist for a node on each slab cache, used for memory hot-remove. | ||
1334 | * Returns -EBUSY if all objects cannot be drained so that the node is not | ||
1335 | * removed. | ||
1336 | * | ||
1337 | * Must hold cache_chain_mutex. | ||
1338 | */ | ||
1339 | static int __meminit drain_cache_nodelists_node(int node) | ||
1340 | { | ||
1341 | struct kmem_cache *cachep; | ||
1342 | int ret = 0; | ||
1343 | |||
1344 | list_for_each_entry(cachep, &cache_chain, next) { | ||
1345 | struct kmem_list3 *l3; | ||
1346 | |||
1347 | l3 = cachep->nodelists[node]; | ||
1348 | if (!l3) | ||
1349 | continue; | ||
1350 | |||
1351 | drain_freelist(cachep, l3, l3->free_objects); | ||
1352 | |||
1353 | if (!list_empty(&l3->slabs_full) || | ||
1354 | !list_empty(&l3->slabs_partial)) { | ||
1355 | ret = -EBUSY; | ||
1356 | break; | ||
1357 | } | ||
1358 | } | ||
1359 | return ret; | ||
1360 | } | ||
1361 | |||
1362 | static int __meminit slab_memory_callback(struct notifier_block *self, | ||
1363 | unsigned long action, void *arg) | ||
1364 | { | ||
1365 | struct memory_notify *mnb = arg; | ||
1366 | int ret = 0; | ||
1367 | int nid; | ||
1368 | |||
1369 | nid = mnb->status_change_nid; | ||
1370 | if (nid < 0) | ||
1371 | goto out; | ||
1372 | |||
1373 | switch (action) { | ||
1374 | case MEM_GOING_ONLINE: | ||
1375 | mutex_lock(&cache_chain_mutex); | ||
1376 | ret = init_cache_nodelists_node(nid); | ||
1377 | mutex_unlock(&cache_chain_mutex); | ||
1378 | break; | ||
1379 | case MEM_GOING_OFFLINE: | ||
1380 | mutex_lock(&cache_chain_mutex); | ||
1381 | ret = drain_cache_nodelists_node(nid); | ||
1382 | mutex_unlock(&cache_chain_mutex); | ||
1383 | break; | ||
1384 | case MEM_ONLINE: | ||
1385 | case MEM_OFFLINE: | ||
1386 | case MEM_CANCEL_ONLINE: | ||
1387 | case MEM_CANCEL_OFFLINE: | ||
1388 | break; | ||
1389 | } | ||
1390 | out: | ||
1391 | return ret ? notifier_from_errno(ret) : NOTIFY_OK; | ||
1392 | } | ||
1393 | #endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */ | ||
1394 | |||
1334 | /* | 1395 | /* |
1335 | * swap the static kmem_list3 with kmalloced memory | 1396 | * swap the static kmem_list3 with kmalloced memory |
1336 | */ | 1397 | */ |
1337 | static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, | 1398 | static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list, |
1338 | int nodeid) | 1399 | int nodeid) |
1339 | { | 1400 | { |
1340 | struct kmem_list3 *ptr; | 1401 | struct kmem_list3 *ptr; |
1341 | 1402 | ||
@@ -1580,6 +1641,14 @@ void __init kmem_cache_init_late(void) | |||
1580 | */ | 1641 | */ |
1581 | register_cpu_notifier(&cpucache_notifier); | 1642 | register_cpu_notifier(&cpucache_notifier); |
1582 | 1643 | ||
1644 | #ifdef CONFIG_NUMA | ||
1645 | /* | ||
1646 | * Register a memory hotplug callback that initializes and frees | ||
1647 | * nodelists. | ||
1648 | */ | ||
1649 | hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); | ||
1650 | #endif | ||
1651 | |||
1583 | /* | 1652 | /* |
1584 | * The reap timers are started later, with a module init call: That part | 1653 | * The reap timers are started later, with a module init call: That part |
1585 | * of the kernel is not yet operational. | 1654 | * of the kernel is not yet operational. |
@@ -2220,8 +2289,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2220 | if (ralign < align) { | 2289 | if (ralign < align) { |
2221 | ralign = align; | 2290 | ralign = align; |
2222 | } | 2291 | } |
2223 | /* disable debug if necessary */ | 2292 | /* disable debug if not aligning with REDZONE_ALIGN */ |
2224 | if (ralign > __alignof__(unsigned long long)) | 2293 | if (ralign & (__alignof__(unsigned long long) - 1)) |
2225 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | 2294 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); |
2226 | /* | 2295 | /* |
2227 | * 4) Store it. | 2296 | * 4) Store it. |
@@ -2247,8 +2316,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2247 | */ | 2316 | */ |
2248 | if (flags & SLAB_RED_ZONE) { | 2317 | if (flags & SLAB_RED_ZONE) { |
2249 | /* add space for red zone words */ | 2318 | /* add space for red zone words */ |
2250 | cachep->obj_offset += sizeof(unsigned long long); | 2319 | cachep->obj_offset += align; |
2251 | size += 2 * sizeof(unsigned long long); | 2320 | size += align + sizeof(unsigned long long); |
2252 | } | 2321 | } |
2253 | if (flags & SLAB_STORE_USER) { | 2322 | if (flags & SLAB_STORE_USER) { |
2254 | /* user store requires one word storage behind the end of | 2323 | /* user store requires one word storage behind the end of |
@@ -4216,10 +4285,11 @@ static int s_show(struct seq_file *m, void *p) | |||
4216 | unsigned long node_frees = cachep->node_frees; | 4285 | unsigned long node_frees = cachep->node_frees; |
4217 | unsigned long overflows = cachep->node_overflow; | 4286 | unsigned long overflows = cachep->node_overflow; |
4218 | 4287 | ||
4219 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ | 4288 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu " |
4220 | %4lu %4lu %4lu %4lu %4lu", allocs, high, grown, | 4289 | "%4lu %4lu %4lu %4lu %4lu", |
4221 | reaped, errors, max_freeable, node_allocs, | 4290 | allocs, high, grown, |
4222 | node_frees, overflows); | 4291 | reaped, errors, max_freeable, node_allocs, |
4292 | node_frees, overflows); | ||
4223 | } | 4293 | } |
4224 | /* cpu stats */ | 4294 | /* cpu stats */ |
4225 | { | 4295 | { |