diff options
Diffstat (limited to 'mm/slab.c')
| -rw-r--r-- | mm/slab.c | 198 |
1 files changed, 134 insertions, 64 deletions
| @@ -115,6 +115,7 @@ | |||
| 115 | #include <linux/reciprocal_div.h> | 115 | #include <linux/reciprocal_div.h> |
| 116 | #include <linux/debugobjects.h> | 116 | #include <linux/debugobjects.h> |
| 117 | #include <linux/kmemcheck.h> | 117 | #include <linux/kmemcheck.h> |
| 118 | #include <linux/memory.h> | ||
| 118 | 119 | ||
| 119 | #include <asm/cacheflush.h> | 120 | #include <asm/cacheflush.h> |
| 120 | #include <asm/tlbflush.h> | 121 | #include <asm/tlbflush.h> |
| @@ -144,30 +145,6 @@ | |||
| 144 | #define BYTES_PER_WORD sizeof(void *) | 145 | #define BYTES_PER_WORD sizeof(void *) |
| 145 | #define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long)) | 146 | #define REDZONE_ALIGN max(BYTES_PER_WORD, __alignof__(unsigned long long)) |
| 146 | 147 | ||
| 147 | #ifndef ARCH_KMALLOC_MINALIGN | ||
| 148 | /* | ||
| 149 | * Enforce a minimum alignment for the kmalloc caches. | ||
| 150 | * Usually, the kmalloc caches are cache_line_size() aligned, except when | ||
| 151 | * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned. | ||
| 152 | * Some archs want to perform DMA into kmalloc caches and need a guaranteed | ||
| 153 | * alignment larger than the alignment of a 64-bit integer. | ||
| 154 | * ARCH_KMALLOC_MINALIGN allows that. | ||
| 155 | * Note that increasing this value may disable some debug features. | ||
| 156 | */ | ||
| 157 | #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) | ||
| 158 | #endif | ||
| 159 | |||
| 160 | #ifndef ARCH_SLAB_MINALIGN | ||
| 161 | /* | ||
| 162 | * Enforce a minimum alignment for all caches. | ||
| 163 | * Intended for archs that get misalignment faults even for BYTES_PER_WORD | ||
| 164 | * aligned buffers. Includes ARCH_KMALLOC_MINALIGN. | ||
| 165 | * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables | ||
| 166 | * some debug features. | ||
| 167 | */ | ||
| 168 | #define ARCH_SLAB_MINALIGN 0 | ||
| 169 | #endif | ||
| 170 | |||
| 171 | #ifndef ARCH_KMALLOC_FLAGS | 148 | #ifndef ARCH_KMALLOC_FLAGS |
| 172 | #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN | 149 | #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN |
| 173 | #endif | 150 | #endif |
| @@ -1102,6 +1079,52 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
| 1102 | } | 1079 | } |
| 1103 | #endif | 1080 | #endif |
| 1104 | 1081 | ||
| 1082 | /* | ||
| 1083 | * Allocates and initializes nodelists for a node on each slab cache, used for | ||
| 1084 | * either memory or cpu hotplug. If memory is being hot-added, the kmem_list3 | ||
| 1085 | * will be allocated off-node since memory is not yet online for the new node. | ||
| 1086 | * When hotplugging memory or a cpu, existing nodelists are not replaced if | ||
| 1087 | * already in use. | ||
| 1088 | * | ||
| 1089 | * Must hold cache_chain_mutex. | ||
| 1090 | */ | ||
| 1091 | static int init_cache_nodelists_node(int node) | ||
| 1092 | { | ||
| 1093 | struct kmem_cache *cachep; | ||
| 1094 | struct kmem_list3 *l3; | ||
| 1095 | const int memsize = sizeof(struct kmem_list3); | ||
| 1096 | |||
| 1097 | list_for_each_entry(cachep, &cache_chain, next) { | ||
| 1098 | /* | ||
| 1099 | * Set up the size64 kmemlist for cpu before we can | ||
| 1100 | * begin anything. Make sure some other cpu on this | ||
| 1101 | * node has not already allocated this | ||
| 1102 | */ | ||
| 1103 | if (!cachep->nodelists[node]) { | ||
| 1104 | l3 = kmalloc_node(memsize, GFP_KERNEL, node); | ||
| 1105 | if (!l3) | ||
| 1106 | return -ENOMEM; | ||
| 1107 | kmem_list3_init(l3); | ||
| 1108 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | ||
| 1109 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
| 1110 | |||
| 1111 | /* | ||
| 1112 | * The l3s don't come and go as CPUs come and | ||
| 1113 | * go. cache_chain_mutex is sufficient | ||
| 1114 | * protection here. | ||
| 1115 | */ | ||
| 1116 | cachep->nodelists[node] = l3; | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | spin_lock_irq(&cachep->nodelists[node]->list_lock); | ||
| 1120 | cachep->nodelists[node]->free_limit = | ||
| 1121 | (1 + nr_cpus_node(node)) * | ||
| 1122 | cachep->batchcount + cachep->num; | ||
| 1123 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); | ||
| 1124 | } | ||
| 1125 | return 0; | ||
| 1126 | } | ||
| 1127 | |||
| 1105 | static void __cpuinit cpuup_canceled(long cpu) | 1128 | static void __cpuinit cpuup_canceled(long cpu) |
| 1106 | { | 1129 | { |
| 1107 | struct kmem_cache *cachep; | 1130 | struct kmem_cache *cachep; |
| @@ -1172,7 +1195,7 @@ static int __cpuinit cpuup_prepare(long cpu) | |||
| 1172 | struct kmem_cache *cachep; | 1195 | struct kmem_cache *cachep; |
| 1173 | struct kmem_list3 *l3 = NULL; | 1196 | struct kmem_list3 *l3 = NULL; |
| 1174 | int node = cpu_to_node(cpu); | 1197 | int node = cpu_to_node(cpu); |
| 1175 | const int memsize = sizeof(struct kmem_list3); | 1198 | int err; |
| 1176 | 1199 | ||
| 1177 | /* | 1200 | /* |
| 1178 | * We need to do this right in the beginning since | 1201 | * We need to do this right in the beginning since |
| @@ -1180,35 +1203,9 @@ static int __cpuinit cpuup_prepare(long cpu) | |||
| 1180 | * kmalloc_node allows us to add the slab to the right | 1203 | * kmalloc_node allows us to add the slab to the right |
| 1181 | * kmem_list3 and not this cpu's kmem_list3 | 1204 | * kmem_list3 and not this cpu's kmem_list3 |
| 1182 | */ | 1205 | */ |
| 1183 | 1206 | err = init_cache_nodelists_node(node); | |
| 1184 | list_for_each_entry(cachep, &cache_chain, next) { | 1207 | if (err < 0) |
| 1185 | /* | 1208 | goto bad; |
| 1186 | * Set up the size64 kmemlist for cpu before we can | ||
| 1187 | * begin anything. Make sure some other cpu on this | ||
| 1188 | * node has not already allocated this | ||
| 1189 | */ | ||
| 1190 | if (!cachep->nodelists[node]) { | ||
| 1191 | l3 = kmalloc_node(memsize, GFP_KERNEL, node); | ||
| 1192 | if (!l3) | ||
| 1193 | goto bad; | ||
| 1194 | kmem_list3_init(l3); | ||
| 1195 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | ||
| 1196 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
| 1197 | |||
| 1198 | /* | ||
| 1199 | * The l3s don't come and go as CPUs come and | ||
| 1200 | * go. cache_chain_mutex is sufficient | ||
| 1201 | * protection here. | ||
| 1202 | */ | ||
| 1203 | cachep->nodelists[node] = l3; | ||
| 1204 | } | ||
| 1205 | |||
| 1206 | spin_lock_irq(&cachep->nodelists[node]->list_lock); | ||
| 1207 | cachep->nodelists[node]->free_limit = | ||
| 1208 | (1 + nr_cpus_node(node)) * | ||
| 1209 | cachep->batchcount + cachep->num; | ||
| 1210 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); | ||
| 1211 | } | ||
| 1212 | 1209 | ||
| 1213 | /* | 1210 | /* |
| 1214 | * Now we can go ahead with allocating the shared arrays and | 1211 | * Now we can go ahead with allocating the shared arrays and |
| @@ -1331,11 +1328,75 @@ static struct notifier_block __cpuinitdata cpucache_notifier = { | |||
| 1331 | &cpuup_callback, NULL, 0 | 1328 | &cpuup_callback, NULL, 0 |
| 1332 | }; | 1329 | }; |
| 1333 | 1330 | ||
| 1331 | #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG) | ||
| 1332 | /* | ||
| 1333 | * Drains freelist for a node on each slab cache, used for memory hot-remove. | ||
| 1334 | * Returns -EBUSY if all objects cannot be drained so that the node is not | ||
| 1335 | * removed. | ||
| 1336 | * | ||
| 1337 | * Must hold cache_chain_mutex. | ||
| 1338 | */ | ||
| 1339 | static int __meminit drain_cache_nodelists_node(int node) | ||
| 1340 | { | ||
| 1341 | struct kmem_cache *cachep; | ||
| 1342 | int ret = 0; | ||
| 1343 | |||
| 1344 | list_for_each_entry(cachep, &cache_chain, next) { | ||
| 1345 | struct kmem_list3 *l3; | ||
| 1346 | |||
| 1347 | l3 = cachep->nodelists[node]; | ||
| 1348 | if (!l3) | ||
| 1349 | continue; | ||
| 1350 | |||
| 1351 | drain_freelist(cachep, l3, l3->free_objects); | ||
| 1352 | |||
| 1353 | if (!list_empty(&l3->slabs_full) || | ||
| 1354 | !list_empty(&l3->slabs_partial)) { | ||
| 1355 | ret = -EBUSY; | ||
| 1356 | break; | ||
| 1357 | } | ||
| 1358 | } | ||
| 1359 | return ret; | ||
| 1360 | } | ||
| 1361 | |||
| 1362 | static int __meminit slab_memory_callback(struct notifier_block *self, | ||
| 1363 | unsigned long action, void *arg) | ||
| 1364 | { | ||
| 1365 | struct memory_notify *mnb = arg; | ||
| 1366 | int ret = 0; | ||
| 1367 | int nid; | ||
| 1368 | |||
| 1369 | nid = mnb->status_change_nid; | ||
| 1370 | if (nid < 0) | ||
| 1371 | goto out; | ||
| 1372 | |||
| 1373 | switch (action) { | ||
| 1374 | case MEM_GOING_ONLINE: | ||
| 1375 | mutex_lock(&cache_chain_mutex); | ||
| 1376 | ret = init_cache_nodelists_node(nid); | ||
| 1377 | mutex_unlock(&cache_chain_mutex); | ||
| 1378 | break; | ||
| 1379 | case MEM_GOING_OFFLINE: | ||
| 1380 | mutex_lock(&cache_chain_mutex); | ||
| 1381 | ret = drain_cache_nodelists_node(nid); | ||
| 1382 | mutex_unlock(&cache_chain_mutex); | ||
| 1383 | break; | ||
| 1384 | case MEM_ONLINE: | ||
| 1385 | case MEM_OFFLINE: | ||
| 1386 | case MEM_CANCEL_ONLINE: | ||
| 1387 | case MEM_CANCEL_OFFLINE: | ||
| 1388 | break; | ||
| 1389 | } | ||
| 1390 | out: | ||
| 1391 | return ret ? notifier_from_errno(ret) : NOTIFY_OK; | ||
| 1392 | } | ||
| 1393 | #endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */ | ||
| 1394 | |||
| 1334 | /* | 1395 | /* |
| 1335 | * swap the static kmem_list3 with kmalloced memory | 1396 | * swap the static kmem_list3 with kmalloced memory |
| 1336 | */ | 1397 | */ |
| 1337 | static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, | 1398 | static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list, |
| 1338 | int nodeid) | 1399 | int nodeid) |
| 1339 | { | 1400 | { |
| 1340 | struct kmem_list3 *ptr; | 1401 | struct kmem_list3 *ptr; |
| 1341 | 1402 | ||
| @@ -1580,6 +1641,14 @@ void __init kmem_cache_init_late(void) | |||
| 1580 | */ | 1641 | */ |
| 1581 | register_cpu_notifier(&cpucache_notifier); | 1642 | register_cpu_notifier(&cpucache_notifier); |
| 1582 | 1643 | ||
| 1644 | #ifdef CONFIG_NUMA | ||
| 1645 | /* | ||
| 1646 | * Register a memory hotplug callback that initializes and frees | ||
| 1647 | * nodelists. | ||
| 1648 | */ | ||
| 1649 | hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); | ||
| 1650 | #endif | ||
| 1651 | |||
| 1583 | /* | 1652 | /* |
| 1584 | * The reap timers are started later, with a module init call: That part | 1653 | * The reap timers are started later, with a module init call: That part |
| 1585 | * of the kernel is not yet operational. | 1654 | * of the kernel is not yet operational. |
| @@ -2220,8 +2289,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 2220 | if (ralign < align) { | 2289 | if (ralign < align) { |
| 2221 | ralign = align; | 2290 | ralign = align; |
| 2222 | } | 2291 | } |
| 2223 | /* disable debug if necessary */ | 2292 | /* disable debug if not aligning with REDZONE_ALIGN */ |
| 2224 | if (ralign > __alignof__(unsigned long long)) | 2293 | if (ralign & (__alignof__(unsigned long long) - 1)) |
| 2225 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | 2294 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); |
| 2226 | /* | 2295 | /* |
| 2227 | * 4) Store it. | 2296 | * 4) Store it. |
| @@ -2247,8 +2316,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 2247 | */ | 2316 | */ |
| 2248 | if (flags & SLAB_RED_ZONE) { | 2317 | if (flags & SLAB_RED_ZONE) { |
| 2249 | /* add space for red zone words */ | 2318 | /* add space for red zone words */ |
| 2250 | cachep->obj_offset += sizeof(unsigned long long); | 2319 | cachep->obj_offset += align; |
| 2251 | size += 2 * sizeof(unsigned long long); | 2320 | size += align + sizeof(unsigned long long); |
| 2252 | } | 2321 | } |
| 2253 | if (flags & SLAB_STORE_USER) { | 2322 | if (flags & SLAB_STORE_USER) { |
| 2254 | /* user store requires one word storage behind the end of | 2323 | /* user store requires one word storage behind the end of |
| @@ -4216,10 +4285,11 @@ static int s_show(struct seq_file *m, void *p) | |||
| 4216 | unsigned long node_frees = cachep->node_frees; | 4285 | unsigned long node_frees = cachep->node_frees; |
| 4217 | unsigned long overflows = cachep->node_overflow; | 4286 | unsigned long overflows = cachep->node_overflow; |
| 4218 | 4287 | ||
| 4219 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ | 4288 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu " |
| 4220 | %4lu %4lu %4lu %4lu %4lu", allocs, high, grown, | 4289 | "%4lu %4lu %4lu %4lu %4lu", |
| 4221 | reaped, errors, max_freeable, node_allocs, | 4290 | allocs, high, grown, |
| 4222 | node_frees, overflows); | 4291 | reaped, errors, max_freeable, node_allocs, |
| 4292 | node_frees, overflows); | ||
| 4223 | } | 4293 | } |
| 4224 | /* cpu stats */ | 4294 | /* cpu stats */ |
| 4225 | { | 4295 | { |
