aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c169
1 files changed, 108 insertions, 61 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 71370256a7eb..9cc049a942c6 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -294,6 +294,7 @@ struct kmem_list3 {
294 unsigned long next_reap; 294 unsigned long next_reap;
295 int free_touched; 295 int free_touched;
296 unsigned int free_limit; 296 unsigned int free_limit;
297 unsigned int colour_next; /* Per-node cache coloring */
297 spinlock_t list_lock; 298 spinlock_t list_lock;
298 struct array_cache *shared; /* shared per node */ 299 struct array_cache *shared; /* shared per node */
299 struct array_cache **alien; /* on other nodes */ 300 struct array_cache **alien; /* on other nodes */
@@ -344,6 +345,7 @@ static void kmem_list3_init(struct kmem_list3 *parent)
344 INIT_LIST_HEAD(&parent->slabs_free); 345 INIT_LIST_HEAD(&parent->slabs_free);
345 parent->shared = NULL; 346 parent->shared = NULL;
346 parent->alien = NULL; 347 parent->alien = NULL;
348 parent->colour_next = 0;
347 spin_lock_init(&parent->list_lock); 349 spin_lock_init(&parent->list_lock);
348 parent->free_objects = 0; 350 parent->free_objects = 0;
349 parent->free_touched = 0; 351 parent->free_touched = 0;
@@ -390,7 +392,6 @@ struct kmem_cache {
390 392
391 size_t colour; /* cache colouring range */ 393 size_t colour; /* cache colouring range */
392 unsigned int colour_off; /* colour offset */ 394 unsigned int colour_off; /* colour offset */
393 unsigned int colour_next; /* cache colouring */
394 struct kmem_cache *slabp_cache; 395 struct kmem_cache *slabp_cache;
395 unsigned int slab_size; 396 unsigned int slab_size;
396 unsigned int dflags; /* dynamic flags */ 397 unsigned int dflags; /* dynamic flags */
@@ -883,14 +884,14 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
883 } 884 }
884} 885}
885 886
886static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3) 887static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien)
887{ 888{
888 int i = 0; 889 int i = 0;
889 struct array_cache *ac; 890 struct array_cache *ac;
890 unsigned long flags; 891 unsigned long flags;
891 892
892 for_each_online_node(i) { 893 for_each_online_node(i) {
893 ac = l3->alien[i]; 894 ac = alien[i];
894 if (ac) { 895 if (ac) {
895 spin_lock_irqsave(&ac->lock, flags); 896 spin_lock_irqsave(&ac->lock, flags);
896 __drain_alien_cache(cachep, ac, i); 897 __drain_alien_cache(cachep, ac, i);
@@ -900,8 +901,11 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3)
900} 901}
901#else 902#else
902#define alloc_alien_cache(node, limit) do { } while (0) 903#define alloc_alien_cache(node, limit) do { } while (0)
903#define free_alien_cache(ac_ptr) do { } while (0) 904#define drain_alien_cache(cachep, alien) do { } while (0)
904#define drain_alien_cache(cachep, l3) do { } while (0) 905
906static inline void free_alien_cache(struct array_cache **ac_ptr)
907{
908}
905#endif 909#endif
906 910
907static int __devinit cpuup_callback(struct notifier_block *nfb, 911static int __devinit cpuup_callback(struct notifier_block *nfb,
@@ -935,6 +939,11 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
935 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + 939 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
936 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 940 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
937 941
942 /*
943 * The l3s don't come and go as CPUs come and
944 * go. cache_chain_mutex is sufficient
945 * protection here.
946 */
938 cachep->nodelists[node] = l3; 947 cachep->nodelists[node] = l3;
939 } 948 }
940 949
@@ -949,26 +958,47 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
949 & array cache's */ 958 & array cache's */
950 list_for_each_entry(cachep, &cache_chain, next) { 959 list_for_each_entry(cachep, &cache_chain, next) {
951 struct array_cache *nc; 960 struct array_cache *nc;
961 struct array_cache *shared;
962 struct array_cache **alien;
952 963
953 nc = alloc_arraycache(node, cachep->limit, 964 nc = alloc_arraycache(node, cachep->limit,
954 cachep->batchcount); 965 cachep->batchcount);
955 if (!nc) 966 if (!nc)
956 goto bad; 967 goto bad;
968 shared = alloc_arraycache(node,
969 cachep->shared * cachep->batchcount,
970 0xbaadf00d);
971 if (!shared)
972 goto bad;
973#ifdef CONFIG_NUMA
974 alien = alloc_alien_cache(node, cachep->limit);
975 if (!alien)
976 goto bad;
977#endif
957 cachep->array[cpu] = nc; 978 cachep->array[cpu] = nc;
958 979
959 l3 = cachep->nodelists[node]; 980 l3 = cachep->nodelists[node];
960 BUG_ON(!l3); 981 BUG_ON(!l3);
961 if (!l3->shared) {
962 if (!(nc = alloc_arraycache(node,
963 cachep->shared *
964 cachep->batchcount,
965 0xbaadf00d)))
966 goto bad;
967 982
968 /* we are serialised from CPU_DEAD or 983 spin_lock_irq(&l3->list_lock);
969 CPU_UP_CANCELLED by the cpucontrol lock */ 984 if (!l3->shared) {
970 l3->shared = nc; 985 /*
986 * We are serialised from CPU_DEAD or
987 * CPU_UP_CANCELLED by the cpucontrol lock
988 */
989 l3->shared = shared;
990 shared = NULL;
991 }
992#ifdef CONFIG_NUMA
993 if (!l3->alien) {
994 l3->alien = alien;
995 alien = NULL;
971 } 996 }
997#endif
998 spin_unlock_irq(&l3->list_lock);
999
1000 kfree(shared);
1001 free_alien_cache(alien);
972 } 1002 }
973 mutex_unlock(&cache_chain_mutex); 1003 mutex_unlock(&cache_chain_mutex);
974 break; 1004 break;
@@ -977,25 +1007,34 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
977 break; 1007 break;
978#ifdef CONFIG_HOTPLUG_CPU 1008#ifdef CONFIG_HOTPLUG_CPU
979 case CPU_DEAD: 1009 case CPU_DEAD:
1010 /*
1011 * Even if all the cpus of a node are down, we don't free the
1012 * kmem_list3 of any cache. This to avoid a race between
1013 * cpu_down, and a kmalloc allocation from another cpu for
1014 * memory from the node of the cpu going down. The list3
1015 * structure is usually allocated from kmem_cache_create() and
1016 * gets destroyed at kmem_cache_destroy().
1017 */
980 /* fall thru */ 1018 /* fall thru */
981 case CPU_UP_CANCELED: 1019 case CPU_UP_CANCELED:
982 mutex_lock(&cache_chain_mutex); 1020 mutex_lock(&cache_chain_mutex);
983 1021
984 list_for_each_entry(cachep, &cache_chain, next) { 1022 list_for_each_entry(cachep, &cache_chain, next) {
985 struct array_cache *nc; 1023 struct array_cache *nc;
1024 struct array_cache *shared;
1025 struct array_cache **alien;
986 cpumask_t mask; 1026 cpumask_t mask;
987 1027
988 mask = node_to_cpumask(node); 1028 mask = node_to_cpumask(node);
989 spin_lock_irq(&cachep->spinlock);
990 /* cpu is dead; no one can alloc from it. */ 1029 /* cpu is dead; no one can alloc from it. */
991 nc = cachep->array[cpu]; 1030 nc = cachep->array[cpu];
992 cachep->array[cpu] = NULL; 1031 cachep->array[cpu] = NULL;
993 l3 = cachep->nodelists[node]; 1032 l3 = cachep->nodelists[node];
994 1033
995 if (!l3) 1034 if (!l3)
996 goto unlock_cache; 1035 goto free_array_cache;
997 1036
998 spin_lock(&l3->list_lock); 1037 spin_lock_irq(&l3->list_lock);
999 1038
1000 /* Free limit for this kmem_list3 */ 1039 /* Free limit for this kmem_list3 */
1001 l3->free_limit -= cachep->batchcount; 1040 l3->free_limit -= cachep->batchcount;
@@ -1003,34 +1042,44 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
1003 free_block(cachep, nc->entry, nc->avail, node); 1042 free_block(cachep, nc->entry, nc->avail, node);
1004 1043
1005 if (!cpus_empty(mask)) { 1044 if (!cpus_empty(mask)) {
1006 spin_unlock(&l3->list_lock); 1045 spin_unlock_irq(&l3->list_lock);
1007 goto unlock_cache; 1046 goto free_array_cache;
1008 } 1047 }
1009 1048
1010 if (l3->shared) { 1049 shared = l3->shared;
1050 if (shared) {
1011 free_block(cachep, l3->shared->entry, 1051 free_block(cachep, l3->shared->entry,
1012 l3->shared->avail, node); 1052 l3->shared->avail, node);
1013 kfree(l3->shared);
1014 l3->shared = NULL; 1053 l3->shared = NULL;
1015 } 1054 }
1016 if (l3->alien) {
1017 drain_alien_cache(cachep, l3);
1018 free_alien_cache(l3->alien);
1019 l3->alien = NULL;
1020 }
1021 1055
1022 /* free slabs belonging to this node */ 1056 alien = l3->alien;
1023 if (__node_shrink(cachep, node)) { 1057 l3->alien = NULL;
1024 cachep->nodelists[node] = NULL; 1058
1025 spin_unlock(&l3->list_lock); 1059 spin_unlock_irq(&l3->list_lock);
1026 kfree(l3); 1060
1027 } else { 1061 kfree(shared);
1028 spin_unlock(&l3->list_lock); 1062 if (alien) {
1063 drain_alien_cache(cachep, alien);
1064 free_alien_cache(alien);
1029 } 1065 }
1030 unlock_cache: 1066free_array_cache:
1031 spin_unlock_irq(&cachep->spinlock);
1032 kfree(nc); 1067 kfree(nc);
1033 } 1068 }
1069 /*
1070 * In the previous loop, all the objects were freed to
1071 * the respective cache's slabs, now we can go ahead and
1072 * shrink each nodelist to its limit.
1073 */
1074 list_for_each_entry(cachep, &cache_chain, next) {
1075 l3 = cachep->nodelists[node];
1076 if (!l3)
1077 continue;
1078 spin_lock_irq(&l3->list_lock);
1079 /* free slabs belonging to this node */
1080 __node_shrink(cachep, node);
1081 spin_unlock_irq(&l3->list_lock);
1082 }
1034 mutex_unlock(&cache_chain_mutex); 1083 mutex_unlock(&cache_chain_mutex);
1035 break; 1084 break;
1036#endif 1085#endif
@@ -1119,7 +1168,6 @@ void __init kmem_cache_init(void)
1119 BUG(); 1168 BUG();
1120 1169
1121 cache_cache.colour = left_over / cache_cache.colour_off; 1170 cache_cache.colour = left_over / cache_cache.colour_off;
1122 cache_cache.colour_next = 0;
1123 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + 1171 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1124 sizeof(struct slab), cache_line_size()); 1172 sizeof(struct slab), cache_line_size());
1125 1173
@@ -2011,18 +2059,16 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
2011 2059
2012 smp_call_function_all_cpus(do_drain, cachep); 2060 smp_call_function_all_cpus(do_drain, cachep);
2013 check_irq_on(); 2061 check_irq_on();
2014 spin_lock_irq(&cachep->spinlock);
2015 for_each_online_node(node) { 2062 for_each_online_node(node) {
2016 l3 = cachep->nodelists[node]; 2063 l3 = cachep->nodelists[node];
2017 if (l3) { 2064 if (l3) {
2018 spin_lock(&l3->list_lock); 2065 spin_lock_irq(&l3->list_lock);
2019 drain_array_locked(cachep, l3->shared, 1, node); 2066 drain_array_locked(cachep, l3->shared, 1, node);
2020 spin_unlock(&l3->list_lock); 2067 spin_unlock_irq(&l3->list_lock);
2021 if (l3->alien) 2068 if (l3->alien)
2022 drain_alien_cache(cachep, l3); 2069 drain_alien_cache(cachep, l3->alien);
2023 } 2070 }
2024 } 2071 }
2025 spin_unlock_irq(&cachep->spinlock);
2026} 2072}
2027 2073
2028static int __node_shrink(struct kmem_cache *cachep, int node) 2074static int __node_shrink(struct kmem_cache *cachep, int node)
@@ -2324,20 +2370,20 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2324 */ 2370 */
2325 ctor_flags |= SLAB_CTOR_ATOMIC; 2371 ctor_flags |= SLAB_CTOR_ATOMIC;
2326 2372
2327 /* About to mess with non-constant members - lock. */ 2373 /* Take the l3 list lock to change the colour_next on this node */
2328 check_irq_off(); 2374 check_irq_off();
2329 spin_lock(&cachep->spinlock); 2375 l3 = cachep->nodelists[nodeid];
2376 spin_lock(&l3->list_lock);
2330 2377
2331 /* Get colour for the slab, and cal the next value. */ 2378 /* Get colour for the slab, and cal the next value. */
2332 offset = cachep->colour_next; 2379 offset = l3->colour_next;
2333 cachep->colour_next++; 2380 l3->colour_next++;
2334 if (cachep->colour_next >= cachep->colour) 2381 if (l3->colour_next >= cachep->colour)
2335 cachep->colour_next = 0; 2382 l3->colour_next = 0;
2336 offset *= cachep->colour_off; 2383 spin_unlock(&l3->list_lock);
2337 2384
2338 spin_unlock(&cachep->spinlock); 2385 offset *= cachep->colour_off;
2339 2386
2340 check_irq_off();
2341 if (local_flags & __GFP_WAIT) 2387 if (local_flags & __GFP_WAIT)
2342 local_irq_enable(); 2388 local_irq_enable();
2343 2389
@@ -2367,7 +2413,6 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2367 if (local_flags & __GFP_WAIT) 2413 if (local_flags & __GFP_WAIT)
2368 local_irq_disable(); 2414 local_irq_disable();
2369 check_irq_off(); 2415 check_irq_off();
2370 l3 = cachep->nodelists[nodeid];
2371 spin_lock(&l3->list_lock); 2416 spin_lock(&l3->list_lock);
2372 2417
2373 /* Make slab active. */ 2418 /* Make slab active. */
@@ -2725,6 +2770,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node
2725 BUG_ON(!l3); 2770 BUG_ON(!l3);
2726 2771
2727 retry: 2772 retry:
2773 check_irq_off();
2728 spin_lock(&l3->list_lock); 2774 spin_lock(&l3->list_lock);
2729 entry = l3->slabs_partial.next; 2775 entry = l3->slabs_partial.next;
2730 if (entry == &l3->slabs_partial) { 2776 if (entry == &l3->slabs_partial) {
@@ -3304,11 +3350,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount
3304 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); 3350 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
3305 3351
3306 check_irq_on(); 3352 check_irq_on();
3307 spin_lock_irq(&cachep->spinlock); 3353 spin_lock(&cachep->spinlock);
3308 cachep->batchcount = batchcount; 3354 cachep->batchcount = batchcount;
3309 cachep->limit = limit; 3355 cachep->limit = limit;
3310 cachep->shared = shared; 3356 cachep->shared = shared;
3311 spin_unlock_irq(&cachep->spinlock); 3357 spin_unlock(&cachep->spinlock);
3312 3358
3313 for_each_online_cpu(i) { 3359 for_each_online_cpu(i) {
3314 struct array_cache *ccold = new.new[i]; 3360 struct array_cache *ccold = new.new[i];
@@ -3440,7 +3486,7 @@ static void cache_reap(void *unused)
3440 3486
3441 l3 = searchp->nodelists[numa_node_id()]; 3487 l3 = searchp->nodelists[numa_node_id()];
3442 if (l3->alien) 3488 if (l3->alien)
3443 drain_alien_cache(searchp, l3); 3489 drain_alien_cache(searchp, l3->alien);
3444 spin_lock_irq(&l3->list_lock); 3490 spin_lock_irq(&l3->list_lock);
3445 3491
3446 drain_array_locked(searchp, cpu_cache_get(searchp), 0, 3492 drain_array_locked(searchp, cpu_cache_get(searchp), 0,
@@ -3564,8 +3610,7 @@ static int s_show(struct seq_file *m, void *p)
3564 int node; 3610 int node;
3565 struct kmem_list3 *l3; 3611 struct kmem_list3 *l3;
3566 3612
3567 check_irq_on(); 3613 spin_lock(&cachep->spinlock);
3568 spin_lock_irq(&cachep->spinlock);
3569 active_objs = 0; 3614 active_objs = 0;
3570 num_slabs = 0; 3615 num_slabs = 0;
3571 for_each_online_node(node) { 3616 for_each_online_node(node) {
@@ -3573,7 +3618,8 @@ static int s_show(struct seq_file *m, void *p)
3573 if (!l3) 3618 if (!l3)
3574 continue; 3619 continue;
3575 3620
3576 spin_lock(&l3->list_lock); 3621 check_irq_on();
3622 spin_lock_irq(&l3->list_lock);
3577 3623
3578 list_for_each(q, &l3->slabs_full) { 3624 list_for_each(q, &l3->slabs_full) {
3579 slabp = list_entry(q, struct slab, list); 3625 slabp = list_entry(q, struct slab, list);
@@ -3598,9 +3644,10 @@ static int s_show(struct seq_file *m, void *p)
3598 num_slabs++; 3644 num_slabs++;
3599 } 3645 }
3600 free_objects += l3->free_objects; 3646 free_objects += l3->free_objects;
3601 shared_avail += l3->shared->avail; 3647 if (l3->shared)
3648 shared_avail += l3->shared->avail;
3602 3649
3603 spin_unlock(&l3->list_lock); 3650 spin_unlock_irq(&l3->list_lock);
3604 } 3651 }
3605 num_slabs += active_slabs; 3652 num_slabs += active_slabs;
3606 num_objs = num_slabs * cachep->num; 3653 num_objs = num_slabs * cachep->num;
@@ -3644,7 +3691,7 @@ static int s_show(struct seq_file *m, void *p)
3644 } 3691 }
3645#endif 3692#endif
3646 seq_putc(m, '\n'); 3693 seq_putc(m, '\n');
3647 spin_unlock_irq(&cachep->spinlock); 3694 spin_unlock(&cachep->spinlock);
3648 return 0; 3695 return 0;
3649} 3696}
3650 3697