aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/slab.c157
1 files changed, 125 insertions, 32 deletions
diff --git a/mm/slab.c b/mm/slab.c
index a9f325b28bed..3230cd2c6b3b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -115,6 +115,7 @@
115#include <linux/reciprocal_div.h> 115#include <linux/reciprocal_div.h>
116#include <linux/debugobjects.h> 116#include <linux/debugobjects.h>
117#include <linux/kmemcheck.h> 117#include <linux/kmemcheck.h>
118#include <linux/memory.h>
118 119
119#include <asm/cacheflush.h> 120#include <asm/cacheflush.h>
120#include <asm/tlbflush.h> 121#include <asm/tlbflush.h>
@@ -1102,6 +1103,52 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1102} 1103}
1103#endif 1104#endif
1104 1105
1106/*
1107 * Allocates and initializes nodelists for a node on each slab cache, used for
1108 * either memory or cpu hotplug. If memory is being hot-added, the kmem_list3
1109 * will be allocated off-node since memory is not yet online for the new node.
1110 * When hotplugging memory or a cpu, existing nodelists are not replaced if
1111 * already in use.
1112 *
1113 * Must hold cache_chain_mutex.
1114 */
1115static int init_cache_nodelists_node(int node)
1116{
1117 struct kmem_cache *cachep;
1118 struct kmem_list3 *l3;
1119 const int memsize = sizeof(struct kmem_list3);
1120
1121 list_for_each_entry(cachep, &cache_chain, next) {
1122 /*
1123 * Set up the size64 kmemlist for cpu before we can
1124 * begin anything. Make sure some other cpu on this
1125 * node has not already allocated this
1126 */
1127 if (!cachep->nodelists[node]) {
1128 l3 = kmalloc_node(memsize, GFP_KERNEL, node);
1129 if (!l3)
1130 return -ENOMEM;
1131 kmem_list3_init(l3);
1132 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
1133 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1134
1135 /*
1136 * The l3s don't come and go as CPUs come and
1137 * go. cache_chain_mutex is sufficient
1138 * protection here.
1139 */
1140 cachep->nodelists[node] = l3;
1141 }
1142
1143 spin_lock_irq(&cachep->nodelists[node]->list_lock);
1144 cachep->nodelists[node]->free_limit =
1145 (1 + nr_cpus_node(node)) *
1146 cachep->batchcount + cachep->num;
1147 spin_unlock_irq(&cachep->nodelists[node]->list_lock);
1148 }
1149 return 0;
1150}
1151
1105static void __cpuinit cpuup_canceled(long cpu) 1152static void __cpuinit cpuup_canceled(long cpu)
1106{ 1153{
1107 struct kmem_cache *cachep; 1154 struct kmem_cache *cachep;
@@ -1172,7 +1219,7 @@ static int __cpuinit cpuup_prepare(long cpu)
1172 struct kmem_cache *cachep; 1219 struct kmem_cache *cachep;
1173 struct kmem_list3 *l3 = NULL; 1220 struct kmem_list3 *l3 = NULL;
1174 int node = cpu_to_node(cpu); 1221 int node = cpu_to_node(cpu);
1175 const int memsize = sizeof(struct kmem_list3); 1222 int err;
1176 1223
1177 /* 1224 /*
1178 * We need to do this right in the beginning since 1225 * We need to do this right in the beginning since
@@ -1180,35 +1227,9 @@ static int __cpuinit cpuup_prepare(long cpu)
1180 * kmalloc_node allows us to add the slab to the right 1227 * kmalloc_node allows us to add the slab to the right
1181 * kmem_list3 and not this cpu's kmem_list3 1228 * kmem_list3 and not this cpu's kmem_list3
1182 */ 1229 */
1183 1230 err = init_cache_nodelists_node(node);
1184 list_for_each_entry(cachep, &cache_chain, next) { 1231 if (err < 0)
1185 /* 1232 goto bad;
1186 * Set up the size64 kmemlist for cpu before we can
1187 * begin anything. Make sure some other cpu on this
1188 * node has not already allocated this
1189 */
1190 if (!cachep->nodelists[node]) {
1191 l3 = kmalloc_node(memsize, GFP_KERNEL, node);
1192 if (!l3)
1193 goto bad;
1194 kmem_list3_init(l3);
1195 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
1196 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1197
1198 /*
1199 * The l3s don't come and go as CPUs come and
1200 * go. cache_chain_mutex is sufficient
1201 * protection here.
1202 */
1203 cachep->nodelists[node] = l3;
1204 }
1205
1206 spin_lock_irq(&cachep->nodelists[node]->list_lock);
1207 cachep->nodelists[node]->free_limit =
1208 (1 + nr_cpus_node(node)) *
1209 cachep->batchcount + cachep->num;
1210 spin_unlock_irq(&cachep->nodelists[node]->list_lock);
1211 }
1212 1233
1213 /* 1234 /*
1214 * Now we can go ahead with allocating the shared arrays and 1235 * Now we can go ahead with allocating the shared arrays and
@@ -1331,11 +1352,75 @@ static struct notifier_block __cpuinitdata cpucache_notifier = {
1331 &cpuup_callback, NULL, 0 1352 &cpuup_callback, NULL, 0
1332}; 1353};
1333 1354
1355#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
1356/*
1357 * Drains freelist for a node on each slab cache, used for memory hot-remove.
1358 * Returns -EBUSY if all objects cannot be drained so that the node is not
1359 * removed.
1360 *
1361 * Must hold cache_chain_mutex.
1362 */
1363static int __meminit drain_cache_nodelists_node(int node)
1364{
1365 struct kmem_cache *cachep;
1366 int ret = 0;
1367
1368 list_for_each_entry(cachep, &cache_chain, next) {
1369 struct kmem_list3 *l3;
1370
1371 l3 = cachep->nodelists[node];
1372 if (!l3)
1373 continue;
1374
1375 drain_freelist(cachep, l3, l3->free_objects);
1376
1377 if (!list_empty(&l3->slabs_full) ||
1378 !list_empty(&l3->slabs_partial)) {
1379 ret = -EBUSY;
1380 break;
1381 }
1382 }
1383 return ret;
1384}
1385
1386static int __meminit slab_memory_callback(struct notifier_block *self,
1387 unsigned long action, void *arg)
1388{
1389 struct memory_notify *mnb = arg;
1390 int ret = 0;
1391 int nid;
1392
1393 nid = mnb->status_change_nid;
1394 if (nid < 0)
1395 goto out;
1396
1397 switch (action) {
1398 case MEM_GOING_ONLINE:
1399 mutex_lock(&cache_chain_mutex);
1400 ret = init_cache_nodelists_node(nid);
1401 mutex_unlock(&cache_chain_mutex);
1402 break;
1403 case MEM_GOING_OFFLINE:
1404 mutex_lock(&cache_chain_mutex);
1405 ret = drain_cache_nodelists_node(nid);
1406 mutex_unlock(&cache_chain_mutex);
1407 break;
1408 case MEM_ONLINE:
1409 case MEM_OFFLINE:
1410 case MEM_CANCEL_ONLINE:
1411 case MEM_CANCEL_OFFLINE:
1412 break;
1413 }
1414out:
1415 return ret ? notifier_from_errno(ret) : NOTIFY_OK;
1416}
1417#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
1418
1334/* 1419/*
1335 * swap the static kmem_list3 with kmalloced memory 1420 * swap the static kmem_list3 with kmalloced memory
1336 */ 1421 */
1337static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, 1422static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1338 int nodeid) 1423 int nodeid)
1339{ 1424{
1340 struct kmem_list3 *ptr; 1425 struct kmem_list3 *ptr;
1341 1426
@@ -1580,6 +1665,14 @@ void __init kmem_cache_init_late(void)
1580 */ 1665 */
1581 register_cpu_notifier(&cpucache_notifier); 1666 register_cpu_notifier(&cpucache_notifier);
1582 1667
1668#ifdef CONFIG_NUMA
1669 /*
1670 * Register a memory hotplug callback that initializes and frees
1671 * nodelists.
1672 */
1673 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
1674#endif
1675
1583 /* 1676 /*
1584 * The reap timers are started later, with a module init call: That part 1677 * The reap timers are started later, with a module init call: That part
1585 * of the kernel is not yet operational. 1678 * of the kernel is not yet operational.