diff options
Diffstat (limited to 'mm/slab.c')
-rw-r--r-- | mm/slab.c | 174 |
1 files changed, 134 insertions, 40 deletions
@@ -115,6 +115,7 @@ | |||
115 | #include <linux/reciprocal_div.h> | 115 | #include <linux/reciprocal_div.h> |
116 | #include <linux/debugobjects.h> | 116 | #include <linux/debugobjects.h> |
117 | #include <linux/kmemcheck.h> | 117 | #include <linux/kmemcheck.h> |
118 | #include <linux/memory.h> | ||
118 | 119 | ||
119 | #include <asm/cacheflush.h> | 120 | #include <asm/cacheflush.h> |
120 | #include <asm/tlbflush.h> | 121 | #include <asm/tlbflush.h> |
@@ -1078,6 +1079,52 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) | |||
1078 | } | 1079 | } |
1079 | #endif | 1080 | #endif |
1080 | 1081 | ||
1082 | /* | ||
1083 | * Allocates and initializes nodelists for a node on each slab cache, used for | ||
1084 | * either memory or cpu hotplug. If memory is being hot-added, the kmem_list3 | ||
1085 | * will be allocated off-node since memory is not yet online for the new node. | ||
1086 | * When hotplugging memory or a cpu, existing nodelists are not replaced if | ||
1087 | * already in use. | ||
1088 | * | ||
1089 | * Must hold cache_chain_mutex. | ||
1090 | */ | ||
1091 | static int init_cache_nodelists_node(int node) | ||
1092 | { | ||
1093 | struct kmem_cache *cachep; | ||
1094 | struct kmem_list3 *l3; | ||
1095 | const int memsize = sizeof(struct kmem_list3); | ||
1096 | |||
1097 | list_for_each_entry(cachep, &cache_chain, next) { | ||
1098 | /* | ||
1099 | * Set up the size64 kmemlist for cpu before we can | ||
1100 | * begin anything. Make sure some other cpu on this | ||
1101 | * node has not already allocated this | ||
1102 | */ | ||
1103 | if (!cachep->nodelists[node]) { | ||
1104 | l3 = kmalloc_node(memsize, GFP_KERNEL, node); | ||
1105 | if (!l3) | ||
1106 | return -ENOMEM; | ||
1107 | kmem_list3_init(l3); | ||
1108 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | ||
1109 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
1110 | |||
1111 | /* | ||
1112 | * The l3s don't come and go as CPUs come and | ||
1113 | * go. cache_chain_mutex is sufficient | ||
1114 | * protection here. | ||
1115 | */ | ||
1116 | cachep->nodelists[node] = l3; | ||
1117 | } | ||
1118 | |||
1119 | spin_lock_irq(&cachep->nodelists[node]->list_lock); | ||
1120 | cachep->nodelists[node]->free_limit = | ||
1121 | (1 + nr_cpus_node(node)) * | ||
1122 | cachep->batchcount + cachep->num; | ||
1123 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); | ||
1124 | } | ||
1125 | return 0; | ||
1126 | } | ||
1127 | |||
1081 | static void __cpuinit cpuup_canceled(long cpu) | 1128 | static void __cpuinit cpuup_canceled(long cpu) |
1082 | { | 1129 | { |
1083 | struct kmem_cache *cachep; | 1130 | struct kmem_cache *cachep; |
@@ -1148,7 +1195,7 @@ static int __cpuinit cpuup_prepare(long cpu) | |||
1148 | struct kmem_cache *cachep; | 1195 | struct kmem_cache *cachep; |
1149 | struct kmem_list3 *l3 = NULL; | 1196 | struct kmem_list3 *l3 = NULL; |
1150 | int node = cpu_to_node(cpu); | 1197 | int node = cpu_to_node(cpu); |
1151 | const int memsize = sizeof(struct kmem_list3); | 1198 | int err; |
1152 | 1199 | ||
1153 | /* | 1200 | /* |
1154 | * We need to do this right in the beginning since | 1201 | * We need to do this right in the beginning since |
@@ -1156,35 +1203,9 @@ static int __cpuinit cpuup_prepare(long cpu) | |||
1156 | * kmalloc_node allows us to add the slab to the right | 1203 | * kmalloc_node allows us to add the slab to the right |
1157 | * kmem_list3 and not this cpu's kmem_list3 | 1204 | * kmem_list3 and not this cpu's kmem_list3 |
1158 | */ | 1205 | */ |
1159 | 1206 | err = init_cache_nodelists_node(node); | |
1160 | list_for_each_entry(cachep, &cache_chain, next) { | 1207 | if (err < 0) |
1161 | /* | 1208 | goto bad; |
1162 | * Set up the size64 kmemlist for cpu before we can | ||
1163 | * begin anything. Make sure some other cpu on this | ||
1164 | * node has not already allocated this | ||
1165 | */ | ||
1166 | if (!cachep->nodelists[node]) { | ||
1167 | l3 = kmalloc_node(memsize, GFP_KERNEL, node); | ||
1168 | if (!l3) | ||
1169 | goto bad; | ||
1170 | kmem_list3_init(l3); | ||
1171 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | ||
1172 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
1173 | |||
1174 | /* | ||
1175 | * The l3s don't come and go as CPUs come and | ||
1176 | * go. cache_chain_mutex is sufficient | ||
1177 | * protection here. | ||
1178 | */ | ||
1179 | cachep->nodelists[node] = l3; | ||
1180 | } | ||
1181 | |||
1182 | spin_lock_irq(&cachep->nodelists[node]->list_lock); | ||
1183 | cachep->nodelists[node]->free_limit = | ||
1184 | (1 + nr_cpus_node(node)) * | ||
1185 | cachep->batchcount + cachep->num; | ||
1186 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); | ||
1187 | } | ||
1188 | 1209 | ||
1189 | /* | 1210 | /* |
1190 | * Now we can go ahead with allocating the shared arrays and | 1211 | * Now we can go ahead with allocating the shared arrays and |
@@ -1307,11 +1328,75 @@ static struct notifier_block __cpuinitdata cpucache_notifier = { | |||
1307 | &cpuup_callback, NULL, 0 | 1328 | &cpuup_callback, NULL, 0 |
1308 | }; | 1329 | }; |
1309 | 1330 | ||
1331 | #if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG) | ||
1332 | /* | ||
1333 | * Drains freelist for a node on each slab cache, used for memory hot-remove. | ||
1334 | * Returns -EBUSY if all objects cannot be drained so that the node is not | ||
1335 | * removed. | ||
1336 | * | ||
1337 | * Must hold cache_chain_mutex. | ||
1338 | */ | ||
1339 | static int __meminit drain_cache_nodelists_node(int node) | ||
1340 | { | ||
1341 | struct kmem_cache *cachep; | ||
1342 | int ret = 0; | ||
1343 | |||
1344 | list_for_each_entry(cachep, &cache_chain, next) { | ||
1345 | struct kmem_list3 *l3; | ||
1346 | |||
1347 | l3 = cachep->nodelists[node]; | ||
1348 | if (!l3) | ||
1349 | continue; | ||
1350 | |||
1351 | drain_freelist(cachep, l3, l3->free_objects); | ||
1352 | |||
1353 | if (!list_empty(&l3->slabs_full) || | ||
1354 | !list_empty(&l3->slabs_partial)) { | ||
1355 | ret = -EBUSY; | ||
1356 | break; | ||
1357 | } | ||
1358 | } | ||
1359 | return ret; | ||
1360 | } | ||
1361 | |||
1362 | static int __meminit slab_memory_callback(struct notifier_block *self, | ||
1363 | unsigned long action, void *arg) | ||
1364 | { | ||
1365 | struct memory_notify *mnb = arg; | ||
1366 | int ret = 0; | ||
1367 | int nid; | ||
1368 | |||
1369 | nid = mnb->status_change_nid; | ||
1370 | if (nid < 0) | ||
1371 | goto out; | ||
1372 | |||
1373 | switch (action) { | ||
1374 | case MEM_GOING_ONLINE: | ||
1375 | mutex_lock(&cache_chain_mutex); | ||
1376 | ret = init_cache_nodelists_node(nid); | ||
1377 | mutex_unlock(&cache_chain_mutex); | ||
1378 | break; | ||
1379 | case MEM_GOING_OFFLINE: | ||
1380 | mutex_lock(&cache_chain_mutex); | ||
1381 | ret = drain_cache_nodelists_node(nid); | ||
1382 | mutex_unlock(&cache_chain_mutex); | ||
1383 | break; | ||
1384 | case MEM_ONLINE: | ||
1385 | case MEM_OFFLINE: | ||
1386 | case MEM_CANCEL_ONLINE: | ||
1387 | case MEM_CANCEL_OFFLINE: | ||
1388 | break; | ||
1389 | } | ||
1390 | out: | ||
1391 | return ret ? notifier_from_errno(ret) : NOTIFY_OK; | ||
1392 | } | ||
1393 | #endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */ | ||
1394 | |||
1310 | /* | 1395 | /* |
1311 | * swap the static kmem_list3 with kmalloced memory | 1396 | * swap the static kmem_list3 with kmalloced memory |
1312 | */ | 1397 | */ |
1313 | static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, | 1398 | static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list, |
1314 | int nodeid) | 1399 | int nodeid) |
1315 | { | 1400 | { |
1316 | struct kmem_list3 *ptr; | 1401 | struct kmem_list3 *ptr; |
1317 | 1402 | ||
@@ -1556,6 +1641,14 @@ void __init kmem_cache_init_late(void) | |||
1556 | */ | 1641 | */ |
1557 | register_cpu_notifier(&cpucache_notifier); | 1642 | register_cpu_notifier(&cpucache_notifier); |
1558 | 1643 | ||
1644 | #ifdef CONFIG_NUMA | ||
1645 | /* | ||
1646 | * Register a memory hotplug callback that initializes and frees | ||
1647 | * nodelists. | ||
1648 | */ | ||
1649 | hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); | ||
1650 | #endif | ||
1651 | |||
1559 | /* | 1652 | /* |
1560 | * The reap timers are started later, with a module init call: That part | 1653 | * The reap timers are started later, with a module init call: That part |
1561 | * of the kernel is not yet operational. | 1654 | * of the kernel is not yet operational. |
@@ -2196,8 +2289,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2196 | if (ralign < align) { | 2289 | if (ralign < align) { |
2197 | ralign = align; | 2290 | ralign = align; |
2198 | } | 2291 | } |
2199 | /* disable debug if necessary */ | 2292 | /* disable debug if not aligning with REDZONE_ALIGN */ |
2200 | if (ralign > __alignof__(unsigned long long)) | 2293 | if (ralign & (__alignof__(unsigned long long) - 1)) |
2201 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | 2294 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); |
2202 | /* | 2295 | /* |
2203 | * 4) Store it. | 2296 | * 4) Store it. |
@@ -2223,8 +2316,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2223 | */ | 2316 | */ |
2224 | if (flags & SLAB_RED_ZONE) { | 2317 | if (flags & SLAB_RED_ZONE) { |
2225 | /* add space for red zone words */ | 2318 | /* add space for red zone words */ |
2226 | cachep->obj_offset += sizeof(unsigned long long); | 2319 | cachep->obj_offset += align; |
2227 | size += 2 * sizeof(unsigned long long); | 2320 | size += align + sizeof(unsigned long long); |
2228 | } | 2321 | } |
2229 | if (flags & SLAB_STORE_USER) { | 2322 | if (flags & SLAB_STORE_USER) { |
2230 | /* user store requires one word storage behind the end of | 2323 | /* user store requires one word storage behind the end of |
@@ -4192,10 +4285,11 @@ static int s_show(struct seq_file *m, void *p) | |||
4192 | unsigned long node_frees = cachep->node_frees; | 4285 | unsigned long node_frees = cachep->node_frees; |
4193 | unsigned long overflows = cachep->node_overflow; | 4286 | unsigned long overflows = cachep->node_overflow; |
4194 | 4287 | ||
4195 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ | 4288 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu " |
4196 | %4lu %4lu %4lu %4lu %4lu", allocs, high, grown, | 4289 | "%4lu %4lu %4lu %4lu %4lu", |
4197 | reaped, errors, max_freeable, node_allocs, | 4290 | allocs, high, grown, |
4198 | node_frees, overflows); | 4291 | reaped, errors, max_freeable, node_allocs, |
4292 | node_frees, overflows); | ||
4199 | } | 4293 | } |
4200 | /* cpu stats */ | 4294 | /* cpu stats */ |
4201 | { | 4295 | { |