diff options
Diffstat (limited to 'mm/slab.c')
-rw-r--r-- | mm/slab.c | 169 |
1 files changed, 108 insertions, 61 deletions
@@ -294,6 +294,7 @@ struct kmem_list3 { | |||
294 | unsigned long next_reap; | 294 | unsigned long next_reap; |
295 | int free_touched; | 295 | int free_touched; |
296 | unsigned int free_limit; | 296 | unsigned int free_limit; |
297 | unsigned int colour_next; /* Per-node cache coloring */ | ||
297 | spinlock_t list_lock; | 298 | spinlock_t list_lock; |
298 | struct array_cache *shared; /* shared per node */ | 299 | struct array_cache *shared; /* shared per node */ |
299 | struct array_cache **alien; /* on other nodes */ | 300 | struct array_cache **alien; /* on other nodes */ |
@@ -344,6 +345,7 @@ static void kmem_list3_init(struct kmem_list3 *parent) | |||
344 | INIT_LIST_HEAD(&parent->slabs_free); | 345 | INIT_LIST_HEAD(&parent->slabs_free); |
345 | parent->shared = NULL; | 346 | parent->shared = NULL; |
346 | parent->alien = NULL; | 347 | parent->alien = NULL; |
348 | parent->colour_next = 0; | ||
347 | spin_lock_init(&parent->list_lock); | 349 | spin_lock_init(&parent->list_lock); |
348 | parent->free_objects = 0; | 350 | parent->free_objects = 0; |
349 | parent->free_touched = 0; | 351 | parent->free_touched = 0; |
@@ -390,7 +392,6 @@ struct kmem_cache { | |||
390 | 392 | ||
391 | size_t colour; /* cache colouring range */ | 393 | size_t colour; /* cache colouring range */ |
392 | unsigned int colour_off; /* colour offset */ | 394 | unsigned int colour_off; /* colour offset */ |
393 | unsigned int colour_next; /* cache colouring */ | ||
394 | struct kmem_cache *slabp_cache; | 395 | struct kmem_cache *slabp_cache; |
395 | unsigned int slab_size; | 396 | unsigned int slab_size; |
396 | unsigned int dflags; /* dynamic flags */ | 397 | unsigned int dflags; /* dynamic flags */ |
@@ -883,14 +884,14 @@ static void __drain_alien_cache(struct kmem_cache *cachep, | |||
883 | } | 884 | } |
884 | } | 885 | } |
885 | 886 | ||
886 | static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3) | 887 | static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) |
887 | { | 888 | { |
888 | int i = 0; | 889 | int i = 0; |
889 | struct array_cache *ac; | 890 | struct array_cache *ac; |
890 | unsigned long flags; | 891 | unsigned long flags; |
891 | 892 | ||
892 | for_each_online_node(i) { | 893 | for_each_online_node(i) { |
893 | ac = l3->alien[i]; | 894 | ac = alien[i]; |
894 | if (ac) { | 895 | if (ac) { |
895 | spin_lock_irqsave(&ac->lock, flags); | 896 | spin_lock_irqsave(&ac->lock, flags); |
896 | __drain_alien_cache(cachep, ac, i); | 897 | __drain_alien_cache(cachep, ac, i); |
@@ -900,8 +901,11 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3) | |||
900 | } | 901 | } |
901 | #else | 902 | #else |
902 | #define alloc_alien_cache(node, limit) do { } while (0) | 903 | #define alloc_alien_cache(node, limit) do { } while (0) |
903 | #define free_alien_cache(ac_ptr) do { } while (0) | 904 | #define drain_alien_cache(cachep, alien) do { } while (0) |
904 | #define drain_alien_cache(cachep, l3) do { } while (0) | 905 | |
906 | static inline void free_alien_cache(struct array_cache **ac_ptr) | ||
907 | { | ||
908 | } | ||
905 | #endif | 909 | #endif |
906 | 910 | ||
907 | static int __devinit cpuup_callback(struct notifier_block *nfb, | 911 | static int __devinit cpuup_callback(struct notifier_block *nfb, |
@@ -935,6 +939,11 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
935 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 939 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + |
936 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | 940 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
937 | 941 | ||
942 | /* | ||
943 | * The l3s don't come and go as CPUs come and | ||
944 | * go. cache_chain_mutex is sufficient | ||
945 | * protection here. | ||
946 | */ | ||
938 | cachep->nodelists[node] = l3; | 947 | cachep->nodelists[node] = l3; |
939 | } | 948 | } |
940 | 949 | ||
@@ -949,26 +958,47 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
949 | & array cache's */ | 958 | & array cache's */ |
950 | list_for_each_entry(cachep, &cache_chain, next) { | 959 | list_for_each_entry(cachep, &cache_chain, next) { |
951 | struct array_cache *nc; | 960 | struct array_cache *nc; |
961 | struct array_cache *shared; | ||
962 | struct array_cache **alien; | ||
952 | 963 | ||
953 | nc = alloc_arraycache(node, cachep->limit, | 964 | nc = alloc_arraycache(node, cachep->limit, |
954 | cachep->batchcount); | 965 | cachep->batchcount); |
955 | if (!nc) | 966 | if (!nc) |
956 | goto bad; | 967 | goto bad; |
968 | shared = alloc_arraycache(node, | ||
969 | cachep->shared * cachep->batchcount, | ||
970 | 0xbaadf00d); | ||
971 | if (!shared) | ||
972 | goto bad; | ||
973 | #ifdef CONFIG_NUMA | ||
974 | alien = alloc_alien_cache(node, cachep->limit); | ||
975 | if (!alien) | ||
976 | goto bad; | ||
977 | #endif | ||
957 | cachep->array[cpu] = nc; | 978 | cachep->array[cpu] = nc; |
958 | 979 | ||
959 | l3 = cachep->nodelists[node]; | 980 | l3 = cachep->nodelists[node]; |
960 | BUG_ON(!l3); | 981 | BUG_ON(!l3); |
961 | if (!l3->shared) { | ||
962 | if (!(nc = alloc_arraycache(node, | ||
963 | cachep->shared * | ||
964 | cachep->batchcount, | ||
965 | 0xbaadf00d))) | ||
966 | goto bad; | ||
967 | 982 | ||
968 | /* we are serialised from CPU_DEAD or | 983 | spin_lock_irq(&l3->list_lock); |
969 | CPU_UP_CANCELLED by the cpucontrol lock */ | 984 | if (!l3->shared) { |
970 | l3->shared = nc; | 985 | /* |
986 | * We are serialised from CPU_DEAD or | ||
987 | * CPU_UP_CANCELLED by the cpucontrol lock | ||
988 | */ | ||
989 | l3->shared = shared; | ||
990 | shared = NULL; | ||
991 | } | ||
992 | #ifdef CONFIG_NUMA | ||
993 | if (!l3->alien) { | ||
994 | l3->alien = alien; | ||
995 | alien = NULL; | ||
971 | } | 996 | } |
997 | #endif | ||
998 | spin_unlock_irq(&l3->list_lock); | ||
999 | |||
1000 | kfree(shared); | ||
1001 | free_alien_cache(alien); | ||
972 | } | 1002 | } |
973 | mutex_unlock(&cache_chain_mutex); | 1003 | mutex_unlock(&cache_chain_mutex); |
974 | break; | 1004 | break; |
@@ -977,25 +1007,34 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
977 | break; | 1007 | break; |
978 | #ifdef CONFIG_HOTPLUG_CPU | 1008 | #ifdef CONFIG_HOTPLUG_CPU |
979 | case CPU_DEAD: | 1009 | case CPU_DEAD: |
1010 | /* | ||
1011 | * Even if all the cpus of a node are down, we don't free the | ||
1012 | * kmem_list3 of any cache. This to avoid a race between | ||
1013 | * cpu_down, and a kmalloc allocation from another cpu for | ||
1014 | * memory from the node of the cpu going down. The list3 | ||
1015 | * structure is usually allocated from kmem_cache_create() and | ||
1016 | * gets destroyed at kmem_cache_destroy(). | ||
1017 | */ | ||
980 | /* fall thru */ | 1018 | /* fall thru */ |
981 | case CPU_UP_CANCELED: | 1019 | case CPU_UP_CANCELED: |
982 | mutex_lock(&cache_chain_mutex); | 1020 | mutex_lock(&cache_chain_mutex); |
983 | 1021 | ||
984 | list_for_each_entry(cachep, &cache_chain, next) { | 1022 | list_for_each_entry(cachep, &cache_chain, next) { |
985 | struct array_cache *nc; | 1023 | struct array_cache *nc; |
1024 | struct array_cache *shared; | ||
1025 | struct array_cache **alien; | ||
986 | cpumask_t mask; | 1026 | cpumask_t mask; |
987 | 1027 | ||
988 | mask = node_to_cpumask(node); | 1028 | mask = node_to_cpumask(node); |
989 | spin_lock_irq(&cachep->spinlock); | ||
990 | /* cpu is dead; no one can alloc from it. */ | 1029 | /* cpu is dead; no one can alloc from it. */ |
991 | nc = cachep->array[cpu]; | 1030 | nc = cachep->array[cpu]; |
992 | cachep->array[cpu] = NULL; | 1031 | cachep->array[cpu] = NULL; |
993 | l3 = cachep->nodelists[node]; | 1032 | l3 = cachep->nodelists[node]; |
994 | 1033 | ||
995 | if (!l3) | 1034 | if (!l3) |
996 | goto unlock_cache; | 1035 | goto free_array_cache; |
997 | 1036 | ||
998 | spin_lock(&l3->list_lock); | 1037 | spin_lock_irq(&l3->list_lock); |
999 | 1038 | ||
1000 | /* Free limit for this kmem_list3 */ | 1039 | /* Free limit for this kmem_list3 */ |
1001 | l3->free_limit -= cachep->batchcount; | 1040 | l3->free_limit -= cachep->batchcount; |
@@ -1003,34 +1042,44 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
1003 | free_block(cachep, nc->entry, nc->avail, node); | 1042 | free_block(cachep, nc->entry, nc->avail, node); |
1004 | 1043 | ||
1005 | if (!cpus_empty(mask)) { | 1044 | if (!cpus_empty(mask)) { |
1006 | spin_unlock(&l3->list_lock); | 1045 | spin_unlock_irq(&l3->list_lock); |
1007 | goto unlock_cache; | 1046 | goto free_array_cache; |
1008 | } | 1047 | } |
1009 | 1048 | ||
1010 | if (l3->shared) { | 1049 | shared = l3->shared; |
1050 | if (shared) { | ||
1011 | free_block(cachep, l3->shared->entry, | 1051 | free_block(cachep, l3->shared->entry, |
1012 | l3->shared->avail, node); | 1052 | l3->shared->avail, node); |
1013 | kfree(l3->shared); | ||
1014 | l3->shared = NULL; | 1053 | l3->shared = NULL; |
1015 | } | 1054 | } |
1016 | if (l3->alien) { | ||
1017 | drain_alien_cache(cachep, l3); | ||
1018 | free_alien_cache(l3->alien); | ||
1019 | l3->alien = NULL; | ||
1020 | } | ||
1021 | 1055 | ||
1022 | /* free slabs belonging to this node */ | 1056 | alien = l3->alien; |
1023 | if (__node_shrink(cachep, node)) { | 1057 | l3->alien = NULL; |
1024 | cachep->nodelists[node] = NULL; | 1058 | |
1025 | spin_unlock(&l3->list_lock); | 1059 | spin_unlock_irq(&l3->list_lock); |
1026 | kfree(l3); | 1060 | |
1027 | } else { | 1061 | kfree(shared); |
1028 | spin_unlock(&l3->list_lock); | 1062 | if (alien) { |
1063 | drain_alien_cache(cachep, alien); | ||
1064 | free_alien_cache(alien); | ||
1029 | } | 1065 | } |
1030 | unlock_cache: | 1066 | free_array_cache: |
1031 | spin_unlock_irq(&cachep->spinlock); | ||
1032 | kfree(nc); | 1067 | kfree(nc); |
1033 | } | 1068 | } |
1069 | /* | ||
1070 | * In the previous loop, all the objects were freed to | ||
1071 | * the respective cache's slabs, now we can go ahead and | ||
1072 | * shrink each nodelist to its limit. | ||
1073 | */ | ||
1074 | list_for_each_entry(cachep, &cache_chain, next) { | ||
1075 | l3 = cachep->nodelists[node]; | ||
1076 | if (!l3) | ||
1077 | continue; | ||
1078 | spin_lock_irq(&l3->list_lock); | ||
1079 | /* free slabs belonging to this node */ | ||
1080 | __node_shrink(cachep, node); | ||
1081 | spin_unlock_irq(&l3->list_lock); | ||
1082 | } | ||
1034 | mutex_unlock(&cache_chain_mutex); | 1083 | mutex_unlock(&cache_chain_mutex); |
1035 | break; | 1084 | break; |
1036 | #endif | 1085 | #endif |
@@ -1119,7 +1168,6 @@ void __init kmem_cache_init(void) | |||
1119 | BUG(); | 1168 | BUG(); |
1120 | 1169 | ||
1121 | cache_cache.colour = left_over / cache_cache.colour_off; | 1170 | cache_cache.colour = left_over / cache_cache.colour_off; |
1122 | cache_cache.colour_next = 0; | ||
1123 | cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + | 1171 | cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + |
1124 | sizeof(struct slab), cache_line_size()); | 1172 | sizeof(struct slab), cache_line_size()); |
1125 | 1173 | ||
@@ -2011,18 +2059,16 @@ static void drain_cpu_caches(struct kmem_cache *cachep) | |||
2011 | 2059 | ||
2012 | smp_call_function_all_cpus(do_drain, cachep); | 2060 | smp_call_function_all_cpus(do_drain, cachep); |
2013 | check_irq_on(); | 2061 | check_irq_on(); |
2014 | spin_lock_irq(&cachep->spinlock); | ||
2015 | for_each_online_node(node) { | 2062 | for_each_online_node(node) { |
2016 | l3 = cachep->nodelists[node]; | 2063 | l3 = cachep->nodelists[node]; |
2017 | if (l3) { | 2064 | if (l3) { |
2018 | spin_lock(&l3->list_lock); | 2065 | spin_lock_irq(&l3->list_lock); |
2019 | drain_array_locked(cachep, l3->shared, 1, node); | 2066 | drain_array_locked(cachep, l3->shared, 1, node); |
2020 | spin_unlock(&l3->list_lock); | 2067 | spin_unlock_irq(&l3->list_lock); |
2021 | if (l3->alien) | 2068 | if (l3->alien) |
2022 | drain_alien_cache(cachep, l3); | 2069 | drain_alien_cache(cachep, l3->alien); |
2023 | } | 2070 | } |
2024 | } | 2071 | } |
2025 | spin_unlock_irq(&cachep->spinlock); | ||
2026 | } | 2072 | } |
2027 | 2073 | ||
2028 | static int __node_shrink(struct kmem_cache *cachep, int node) | 2074 | static int __node_shrink(struct kmem_cache *cachep, int node) |
@@ -2324,20 +2370,20 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
2324 | */ | 2370 | */ |
2325 | ctor_flags |= SLAB_CTOR_ATOMIC; | 2371 | ctor_flags |= SLAB_CTOR_ATOMIC; |
2326 | 2372 | ||
2327 | /* About to mess with non-constant members - lock. */ | 2373 | /* Take the l3 list lock to change the colour_next on this node */ |
2328 | check_irq_off(); | 2374 | check_irq_off(); |
2329 | spin_lock(&cachep->spinlock); | 2375 | l3 = cachep->nodelists[nodeid]; |
2376 | spin_lock(&l3->list_lock); | ||
2330 | 2377 | ||
2331 | /* Get colour for the slab, and cal the next value. */ | 2378 | /* Get colour for the slab, and cal the next value. */ |
2332 | offset = cachep->colour_next; | 2379 | offset = l3->colour_next; |
2333 | cachep->colour_next++; | 2380 | l3->colour_next++; |
2334 | if (cachep->colour_next >= cachep->colour) | 2381 | if (l3->colour_next >= cachep->colour) |
2335 | cachep->colour_next = 0; | 2382 | l3->colour_next = 0; |
2336 | offset *= cachep->colour_off; | 2383 | spin_unlock(&l3->list_lock); |
2337 | 2384 | ||
2338 | spin_unlock(&cachep->spinlock); | 2385 | offset *= cachep->colour_off; |
2339 | 2386 | ||
2340 | check_irq_off(); | ||
2341 | if (local_flags & __GFP_WAIT) | 2387 | if (local_flags & __GFP_WAIT) |
2342 | local_irq_enable(); | 2388 | local_irq_enable(); |
2343 | 2389 | ||
@@ -2367,7 +2413,6 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
2367 | if (local_flags & __GFP_WAIT) | 2413 | if (local_flags & __GFP_WAIT) |
2368 | local_irq_disable(); | 2414 | local_irq_disable(); |
2369 | check_irq_off(); | 2415 | check_irq_off(); |
2370 | l3 = cachep->nodelists[nodeid]; | ||
2371 | spin_lock(&l3->list_lock); | 2416 | spin_lock(&l3->list_lock); |
2372 | 2417 | ||
2373 | /* Make slab active. */ | 2418 | /* Make slab active. */ |
@@ -2725,6 +2770,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node | |||
2725 | BUG_ON(!l3); | 2770 | BUG_ON(!l3); |
2726 | 2771 | ||
2727 | retry: | 2772 | retry: |
2773 | check_irq_off(); | ||
2728 | spin_lock(&l3->list_lock); | 2774 | spin_lock(&l3->list_lock); |
2729 | entry = l3->slabs_partial.next; | 2775 | entry = l3->slabs_partial.next; |
2730 | if (entry == &l3->slabs_partial) { | 2776 | if (entry == &l3->slabs_partial) { |
@@ -3304,11 +3350,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount | |||
3304 | smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); | 3350 | smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); |
3305 | 3351 | ||
3306 | check_irq_on(); | 3352 | check_irq_on(); |
3307 | spin_lock_irq(&cachep->spinlock); | 3353 | spin_lock(&cachep->spinlock); |
3308 | cachep->batchcount = batchcount; | 3354 | cachep->batchcount = batchcount; |
3309 | cachep->limit = limit; | 3355 | cachep->limit = limit; |
3310 | cachep->shared = shared; | 3356 | cachep->shared = shared; |
3311 | spin_unlock_irq(&cachep->spinlock); | 3357 | spin_unlock(&cachep->spinlock); |
3312 | 3358 | ||
3313 | for_each_online_cpu(i) { | 3359 | for_each_online_cpu(i) { |
3314 | struct array_cache *ccold = new.new[i]; | 3360 | struct array_cache *ccold = new.new[i]; |
@@ -3440,7 +3486,7 @@ static void cache_reap(void *unused) | |||
3440 | 3486 | ||
3441 | l3 = searchp->nodelists[numa_node_id()]; | 3487 | l3 = searchp->nodelists[numa_node_id()]; |
3442 | if (l3->alien) | 3488 | if (l3->alien) |
3443 | drain_alien_cache(searchp, l3); | 3489 | drain_alien_cache(searchp, l3->alien); |
3444 | spin_lock_irq(&l3->list_lock); | 3490 | spin_lock_irq(&l3->list_lock); |
3445 | 3491 | ||
3446 | drain_array_locked(searchp, cpu_cache_get(searchp), 0, | 3492 | drain_array_locked(searchp, cpu_cache_get(searchp), 0, |
@@ -3564,8 +3610,7 @@ static int s_show(struct seq_file *m, void *p) | |||
3564 | int node; | 3610 | int node; |
3565 | struct kmem_list3 *l3; | 3611 | struct kmem_list3 *l3; |
3566 | 3612 | ||
3567 | check_irq_on(); | 3613 | spin_lock(&cachep->spinlock); |
3568 | spin_lock_irq(&cachep->spinlock); | ||
3569 | active_objs = 0; | 3614 | active_objs = 0; |
3570 | num_slabs = 0; | 3615 | num_slabs = 0; |
3571 | for_each_online_node(node) { | 3616 | for_each_online_node(node) { |
@@ -3573,7 +3618,8 @@ static int s_show(struct seq_file *m, void *p) | |||
3573 | if (!l3) | 3618 | if (!l3) |
3574 | continue; | 3619 | continue; |
3575 | 3620 | ||
3576 | spin_lock(&l3->list_lock); | 3621 | check_irq_on(); |
3622 | spin_lock_irq(&l3->list_lock); | ||
3577 | 3623 | ||
3578 | list_for_each(q, &l3->slabs_full) { | 3624 | list_for_each(q, &l3->slabs_full) { |
3579 | slabp = list_entry(q, struct slab, list); | 3625 | slabp = list_entry(q, struct slab, list); |
@@ -3598,9 +3644,10 @@ static int s_show(struct seq_file *m, void *p) | |||
3598 | num_slabs++; | 3644 | num_slabs++; |
3599 | } | 3645 | } |
3600 | free_objects += l3->free_objects; | 3646 | free_objects += l3->free_objects; |
3601 | shared_avail += l3->shared->avail; | 3647 | if (l3->shared) |
3648 | shared_avail += l3->shared->avail; | ||
3602 | 3649 | ||
3603 | spin_unlock(&l3->list_lock); | 3650 | spin_unlock_irq(&l3->list_lock); |
3604 | } | 3651 | } |
3605 | num_slabs += active_slabs; | 3652 | num_slabs += active_slabs; |
3606 | num_objs = num_slabs * cachep->num; | 3653 | num_objs = num_slabs * cachep->num; |
@@ -3644,7 +3691,7 @@ static int s_show(struct seq_file *m, void *p) | |||
3644 | } | 3691 | } |
3645 | #endif | 3692 | #endif |
3646 | seq_putc(m, '\n'); | 3693 | seq_putc(m, '\n'); |
3647 | spin_unlock_irq(&cachep->spinlock); | 3694 | spin_unlock(&cachep->spinlock); |
3648 | return 0; | 3695 | return 0; |
3649 | } | 3696 | } |
3650 | 3697 | ||