aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavikiran G Thirumalai <kiran@scalex86.org>2006-12-06 23:32:14 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-07 11:39:21 -0500
commit8f5be20bf87da7c7c59c5cc84f630a1eca5cc99c (patch)
tree0fc18e33fa1b43543837e99a0f09c77f686f327b
parenta44b56d354b49f9abb184e5a14f71889856283bb (diff)
[PATCH] mm: slab: eliminate lock_cpu_hotplug from slab
Here's an attempt towards doing away with lock_cpu_hotplug in the slab subsystem. This approach also fixes a bug which shows up when cpus are being offlined/onlined and slab caches are being tuned simultaneously. http://marc.theaimsgroup.com/?l=linux-kernel&m=116098888100481&w=2 The patch has been stress tested overnight on a 2 socket 4 core AMD box with repeated cpu online and offline, while dbench and kernbench process are running, and slab caches being tuned at the same time. There were no lockdep warnings either. (This test on 2,6.18 as 2.6.19-rc crashes at __drain_pages http://marc.theaimsgroup.com/?l=linux-kernel&m=116172164217678&w=2 ) The approach here is to hold cache_chain_mutex from CPU_UP_PREPARE until CPU_ONLINE (similar in approach as worqueue_mutex) . Slab code sensitive to cpu_online_map (kmem_cache_create, kmem_cache_destroy, slabinfo_write, __cache_shrink) is already serialized with cache_chain_mutex. (This patch lengthens cache_chain_mutex hold time at kmem_cache_destroy to cover this). This patch also takes the cache_chain_sem at kmem_cache_shrink to protect sanity of cpu_online_map at __cache_shrink, as viewed by slab. (kmem_cache_shrink->__cache_shrink->drain_cpu_caches). But, really, kmem_cache_shrink is used at just one place in the acpi subsystem! Do we really need to keep kmem_cache_shrink at all? Another note. Looks like a cpu hotplug event can send CPU_UP_CANCELED to a registered subsystem even if the subsystem did not receive CPU_UP_PREPARE. This could be due to a subsystem registered for notification earlier than the current subsystem crapping out with NOTIFY_BAD. Badness can occur with in the CPU_UP_CANCELED code path at slab if this happens (The same would apply for workqueue.c as well). To overcome this, we might have to use either a) a per subsystem flag and avoid handling of CPU_UP_CANCELED, or b) Use a special notifier events like LOCK_ACQUIRE/RELEASE as Gautham was using in his experiments, or c) Do not send CPU_UP_CANCELED to a subsystem which did not receive CPU_UP_PREPARE. I would prefer c). Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org> Signed-off-by: Shai Fultheim <shai@scalex86.org> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--mm/slab.c40
1 files changed, 21 insertions, 19 deletions
diff --git a/mm/slab.c b/mm/slab.c
index ff60a94142f9..3318252f657f 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -730,7 +730,10 @@ static inline void init_lock_keys(void)
730} 730}
731#endif 731#endif
732 732
733/* Guard access to the cache-chain. */ 733/*
734 * 1. Guard access to the cache-chain.
735 * 2. Protect sanity of cpu_online_map against cpu hotplug events
736 */
734static DEFINE_MUTEX(cache_chain_mutex); 737static DEFINE_MUTEX(cache_chain_mutex);
735static struct list_head cache_chain; 738static struct list_head cache_chain;
736 739
@@ -1230,12 +1233,18 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1230 kfree(shared); 1233 kfree(shared);
1231 free_alien_cache(alien); 1234 free_alien_cache(alien);
1232 } 1235 }
1233 mutex_unlock(&cache_chain_mutex);
1234 break; 1236 break;
1235 case CPU_ONLINE: 1237 case CPU_ONLINE:
1238 mutex_unlock(&cache_chain_mutex);
1236 start_cpu_timer(cpu); 1239 start_cpu_timer(cpu);
1237 break; 1240 break;
1238#ifdef CONFIG_HOTPLUG_CPU 1241#ifdef CONFIG_HOTPLUG_CPU
1242 case CPU_DOWN_PREPARE:
1243 mutex_lock(&cache_chain_mutex);
1244 break;
1245 case CPU_DOWN_FAILED:
1246 mutex_unlock(&cache_chain_mutex);
1247 break;
1239 case CPU_DEAD: 1248 case CPU_DEAD:
1240 /* 1249 /*
1241 * Even if all the cpus of a node are down, we don't free the 1250 * Even if all the cpus of a node are down, we don't free the
@@ -1246,8 +1255,8 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1246 * gets destroyed at kmem_cache_destroy(). 1255 * gets destroyed at kmem_cache_destroy().
1247 */ 1256 */
1248 /* fall thru */ 1257 /* fall thru */
1258#endif
1249 case CPU_UP_CANCELED: 1259 case CPU_UP_CANCELED:
1250 mutex_lock(&cache_chain_mutex);
1251 list_for_each_entry(cachep, &cache_chain, next) { 1260 list_for_each_entry(cachep, &cache_chain, next) {
1252 struct array_cache *nc; 1261 struct array_cache *nc;
1253 struct array_cache *shared; 1262 struct array_cache *shared;
@@ -1308,11 +1317,9 @@ free_array_cache:
1308 } 1317 }
1309 mutex_unlock(&cache_chain_mutex); 1318 mutex_unlock(&cache_chain_mutex);
1310 break; 1319 break;
1311#endif
1312 } 1320 }
1313 return NOTIFY_OK; 1321 return NOTIFY_OK;
1314bad: 1322bad:
1315 mutex_unlock(&cache_chain_mutex);
1316 return NOTIFY_BAD; 1323 return NOTIFY_BAD;
1317} 1324}
1318 1325
@@ -2098,11 +2105,9 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2098 } 2105 }
2099 2106
2100 /* 2107 /*
2101 * Prevent CPUs from coming and going. 2108 * We use cache_chain_mutex to ensure a consistent view of
2102 * lock_cpu_hotplug() nests outside cache_chain_mutex 2109 * cpu_online_map as well. Please see cpuup_callback
2103 */ 2110 */
2104 lock_cpu_hotplug();
2105
2106 mutex_lock(&cache_chain_mutex); 2111 mutex_lock(&cache_chain_mutex);
2107 2112
2108 list_for_each_entry(pc, &cache_chain, next) { 2113 list_for_each_entry(pc, &cache_chain, next) {
@@ -2325,7 +2330,6 @@ oops:
2325 panic("kmem_cache_create(): failed to create slab `%s'\n", 2330 panic("kmem_cache_create(): failed to create slab `%s'\n",
2326 name); 2331 name);
2327 mutex_unlock(&cache_chain_mutex); 2332 mutex_unlock(&cache_chain_mutex);
2328 unlock_cpu_hotplug();
2329 return cachep; 2333 return cachep;
2330} 2334}
2331EXPORT_SYMBOL(kmem_cache_create); 2335EXPORT_SYMBOL(kmem_cache_create);
@@ -2443,6 +2447,7 @@ out:
2443 return nr_freed; 2447 return nr_freed;
2444} 2448}
2445 2449
2450/* Called with cache_chain_mutex held to protect against cpu hotplug */
2446static int __cache_shrink(struct kmem_cache *cachep) 2451static int __cache_shrink(struct kmem_cache *cachep)
2447{ 2452{
2448 int ret = 0, i = 0; 2453 int ret = 0, i = 0;
@@ -2473,9 +2478,13 @@ static int __cache_shrink(struct kmem_cache *cachep)
2473 */ 2478 */
2474int kmem_cache_shrink(struct kmem_cache *cachep) 2479int kmem_cache_shrink(struct kmem_cache *cachep)
2475{ 2480{
2481 int ret;
2476 BUG_ON(!cachep || in_interrupt()); 2482 BUG_ON(!cachep || in_interrupt());
2477 2483
2478 return __cache_shrink(cachep); 2484 mutex_lock(&cache_chain_mutex);
2485 ret = __cache_shrink(cachep);
2486 mutex_unlock(&cache_chain_mutex);
2487 return ret;
2479} 2488}
2480EXPORT_SYMBOL(kmem_cache_shrink); 2489EXPORT_SYMBOL(kmem_cache_shrink);
2481 2490
@@ -2499,23 +2508,16 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
2499{ 2508{
2500 BUG_ON(!cachep || in_interrupt()); 2509 BUG_ON(!cachep || in_interrupt());
2501 2510
2502 /* Don't let CPUs to come and go */
2503 lock_cpu_hotplug();
2504
2505 /* Find the cache in the chain of caches. */ 2511 /* Find the cache in the chain of caches. */
2506 mutex_lock(&cache_chain_mutex); 2512 mutex_lock(&cache_chain_mutex);
2507 /* 2513 /*
2508 * the chain is never empty, cache_cache is never destroyed 2514 * the chain is never empty, cache_cache is never destroyed
2509 */ 2515 */
2510 list_del(&cachep->next); 2516 list_del(&cachep->next);
2511 mutex_unlock(&cache_chain_mutex);
2512
2513 if (__cache_shrink(cachep)) { 2517 if (__cache_shrink(cachep)) {
2514 slab_error(cachep, "Can't free all objects"); 2518 slab_error(cachep, "Can't free all objects");
2515 mutex_lock(&cache_chain_mutex);
2516 list_add(&cachep->next, &cache_chain); 2519 list_add(&cachep->next, &cache_chain);
2517 mutex_unlock(&cache_chain_mutex); 2520 mutex_unlock(&cache_chain_mutex);
2518 unlock_cpu_hotplug();
2519 return; 2521 return;
2520 } 2522 }
2521 2523
@@ -2523,7 +2525,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep)
2523 synchronize_rcu(); 2525 synchronize_rcu();
2524 2526
2525 __kmem_cache_destroy(cachep); 2527 __kmem_cache_destroy(cachep);
2526 unlock_cpu_hotplug(); 2528 mutex_unlock(&cache_chain_mutex);
2527} 2529}
2528EXPORT_SYMBOL(kmem_cache_destroy); 2530EXPORT_SYMBOL(kmem_cache_destroy);
2529 2531