aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c176
1 files changed, 114 insertions, 62 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 71370256a7eb..d66c2b0d9715 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -294,6 +294,7 @@ struct kmem_list3 {
294 unsigned long next_reap; 294 unsigned long next_reap;
295 int free_touched; 295 int free_touched;
296 unsigned int free_limit; 296 unsigned int free_limit;
297 unsigned int colour_next; /* Per-node cache coloring */
297 spinlock_t list_lock; 298 spinlock_t list_lock;
298 struct array_cache *shared; /* shared per node */ 299 struct array_cache *shared; /* shared per node */
299 struct array_cache **alien; /* on other nodes */ 300 struct array_cache **alien; /* on other nodes */
@@ -344,6 +345,7 @@ static void kmem_list3_init(struct kmem_list3 *parent)
344 INIT_LIST_HEAD(&parent->slabs_free); 345 INIT_LIST_HEAD(&parent->slabs_free);
345 parent->shared = NULL; 346 parent->shared = NULL;
346 parent->alien = NULL; 347 parent->alien = NULL;
348 parent->colour_next = 0;
347 spin_lock_init(&parent->list_lock); 349 spin_lock_init(&parent->list_lock);
348 parent->free_objects = 0; 350 parent->free_objects = 0;
349 parent->free_touched = 0; 351 parent->free_touched = 0;
@@ -390,7 +392,6 @@ struct kmem_cache {
390 392
391 size_t colour; /* cache colouring range */ 393 size_t colour; /* cache colouring range */
392 unsigned int colour_off; /* colour offset */ 394 unsigned int colour_off; /* colour offset */
393 unsigned int colour_next; /* cache colouring */
394 struct kmem_cache *slabp_cache; 395 struct kmem_cache *slabp_cache;
395 unsigned int slab_size; 396 unsigned int slab_size;
396 unsigned int dflags; /* dynamic flags */ 397 unsigned int dflags; /* dynamic flags */
@@ -883,14 +884,14 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
883 } 884 }
884} 885}
885 886
886static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3) 887static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien)
887{ 888{
888 int i = 0; 889 int i = 0;
889 struct array_cache *ac; 890 struct array_cache *ac;
890 unsigned long flags; 891 unsigned long flags;
891 892
892 for_each_online_node(i) { 893 for_each_online_node(i) {
893 ac = l3->alien[i]; 894 ac = alien[i];
894 if (ac) { 895 if (ac) {
895 spin_lock_irqsave(&ac->lock, flags); 896 spin_lock_irqsave(&ac->lock, flags);
896 __drain_alien_cache(cachep, ac, i); 897 __drain_alien_cache(cachep, ac, i);
@@ -899,9 +900,18 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3)
899 } 900 }
900} 901}
901#else 902#else
902#define alloc_alien_cache(node, limit) do { } while (0) 903
903#define free_alien_cache(ac_ptr) do { } while (0) 904#define drain_alien_cache(cachep, alien) do { } while (0)
904#define drain_alien_cache(cachep, l3) do { } while (0) 905
906static inline struct array_cache **alloc_alien_cache(int node, int limit)
907{
908 return (struct array_cache **) 0x01020304ul;
909}
910
911static inline void free_alien_cache(struct array_cache **ac_ptr)
912{
913}
914
905#endif 915#endif
906 916
907static int __devinit cpuup_callback(struct notifier_block *nfb, 917static int __devinit cpuup_callback(struct notifier_block *nfb,
@@ -935,6 +945,11 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
935 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + 945 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
936 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 946 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
937 947
948 /*
949 * The l3s don't come and go as CPUs come and
950 * go. cache_chain_mutex is sufficient
951 * protection here.
952 */
938 cachep->nodelists[node] = l3; 953 cachep->nodelists[node] = l3;
939 } 954 }
940 955
@@ -949,26 +964,46 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
949 & array cache's */ 964 & array cache's */
950 list_for_each_entry(cachep, &cache_chain, next) { 965 list_for_each_entry(cachep, &cache_chain, next) {
951 struct array_cache *nc; 966 struct array_cache *nc;
967 struct array_cache *shared;
968 struct array_cache **alien;
952 969
953 nc = alloc_arraycache(node, cachep->limit, 970 nc = alloc_arraycache(node, cachep->limit,
954 cachep->batchcount); 971 cachep->batchcount);
955 if (!nc) 972 if (!nc)
956 goto bad; 973 goto bad;
974 shared = alloc_arraycache(node,
975 cachep->shared * cachep->batchcount,
976 0xbaadf00d);
977 if (!shared)
978 goto bad;
979
980 alien = alloc_alien_cache(node, cachep->limit);
981 if (!alien)
982 goto bad;
957 cachep->array[cpu] = nc; 983 cachep->array[cpu] = nc;
958 984
959 l3 = cachep->nodelists[node]; 985 l3 = cachep->nodelists[node];
960 BUG_ON(!l3); 986 BUG_ON(!l3);
961 if (!l3->shared) {
962 if (!(nc = alloc_arraycache(node,
963 cachep->shared *
964 cachep->batchcount,
965 0xbaadf00d)))
966 goto bad;
967 987
968 /* we are serialised from CPU_DEAD or 988 spin_lock_irq(&l3->list_lock);
969 CPU_UP_CANCELLED by the cpucontrol lock */ 989 if (!l3->shared) {
970 l3->shared = nc; 990 /*
991 * We are serialised from CPU_DEAD or
992 * CPU_UP_CANCELLED by the cpucontrol lock
993 */
994 l3->shared = shared;
995 shared = NULL;
996 }
997#ifdef CONFIG_NUMA
998 if (!l3->alien) {
999 l3->alien = alien;
1000 alien = NULL;
971 } 1001 }
1002#endif
1003 spin_unlock_irq(&l3->list_lock);
1004
1005 kfree(shared);
1006 free_alien_cache(alien);
972 } 1007 }
973 mutex_unlock(&cache_chain_mutex); 1008 mutex_unlock(&cache_chain_mutex);
974 break; 1009 break;
@@ -977,25 +1012,34 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
977 break; 1012 break;
978#ifdef CONFIG_HOTPLUG_CPU 1013#ifdef CONFIG_HOTPLUG_CPU
979 case CPU_DEAD: 1014 case CPU_DEAD:
1015 /*
1016 * Even if all the cpus of a node are down, we don't free the
1017 * kmem_list3 of any cache. This to avoid a race between
1018 * cpu_down, and a kmalloc allocation from another cpu for
1019 * memory from the node of the cpu going down. The list3
1020 * structure is usually allocated from kmem_cache_create() and
1021 * gets destroyed at kmem_cache_destroy().
1022 */
980 /* fall thru */ 1023 /* fall thru */
981 case CPU_UP_CANCELED: 1024 case CPU_UP_CANCELED:
982 mutex_lock(&cache_chain_mutex); 1025 mutex_lock(&cache_chain_mutex);
983 1026
984 list_for_each_entry(cachep, &cache_chain, next) { 1027 list_for_each_entry(cachep, &cache_chain, next) {
985 struct array_cache *nc; 1028 struct array_cache *nc;
1029 struct array_cache *shared;
1030 struct array_cache **alien;
986 cpumask_t mask; 1031 cpumask_t mask;
987 1032
988 mask = node_to_cpumask(node); 1033 mask = node_to_cpumask(node);
989 spin_lock_irq(&cachep->spinlock);
990 /* cpu is dead; no one can alloc from it. */ 1034 /* cpu is dead; no one can alloc from it. */
991 nc = cachep->array[cpu]; 1035 nc = cachep->array[cpu];
992 cachep->array[cpu] = NULL; 1036 cachep->array[cpu] = NULL;
993 l3 = cachep->nodelists[node]; 1037 l3 = cachep->nodelists[node];
994 1038
995 if (!l3) 1039 if (!l3)
996 goto unlock_cache; 1040 goto free_array_cache;
997 1041
998 spin_lock(&l3->list_lock); 1042 spin_lock_irq(&l3->list_lock);
999 1043
1000 /* Free limit for this kmem_list3 */ 1044 /* Free limit for this kmem_list3 */
1001 l3->free_limit -= cachep->batchcount; 1045 l3->free_limit -= cachep->batchcount;
@@ -1003,34 +1047,44 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
1003 free_block(cachep, nc->entry, nc->avail, node); 1047 free_block(cachep, nc->entry, nc->avail, node);
1004 1048
1005 if (!cpus_empty(mask)) { 1049 if (!cpus_empty(mask)) {
1006 spin_unlock(&l3->list_lock); 1050 spin_unlock_irq(&l3->list_lock);
1007 goto unlock_cache; 1051 goto free_array_cache;
1008 } 1052 }
1009 1053
1010 if (l3->shared) { 1054 shared = l3->shared;
1055 if (shared) {
1011 free_block(cachep, l3->shared->entry, 1056 free_block(cachep, l3->shared->entry,
1012 l3->shared->avail, node); 1057 l3->shared->avail, node);
1013 kfree(l3->shared);
1014 l3->shared = NULL; 1058 l3->shared = NULL;
1015 } 1059 }
1016 if (l3->alien) {
1017 drain_alien_cache(cachep, l3);
1018 free_alien_cache(l3->alien);
1019 l3->alien = NULL;
1020 }
1021 1060
1022 /* free slabs belonging to this node */ 1061 alien = l3->alien;
1023 if (__node_shrink(cachep, node)) { 1062 l3->alien = NULL;
1024 cachep->nodelists[node] = NULL; 1063
1025 spin_unlock(&l3->list_lock); 1064 spin_unlock_irq(&l3->list_lock);
1026 kfree(l3); 1065
1027 } else { 1066 kfree(shared);
1028 spin_unlock(&l3->list_lock); 1067 if (alien) {
1068 drain_alien_cache(cachep, alien);
1069 free_alien_cache(alien);
1029 } 1070 }
1030 unlock_cache: 1071free_array_cache:
1031 spin_unlock_irq(&cachep->spinlock);
1032 kfree(nc); 1072 kfree(nc);
1033 } 1073 }
1074 /*
1075 * In the previous loop, all the objects were freed to
1076 * the respective cache's slabs, now we can go ahead and
1077 * shrink each nodelist to its limit.
1078 */
1079 list_for_each_entry(cachep, &cache_chain, next) {
1080 l3 = cachep->nodelists[node];
1081 if (!l3)
1082 continue;
1083 spin_lock_irq(&l3->list_lock);
1084 /* free slabs belonging to this node */
1085 __node_shrink(cachep, node);
1086 spin_unlock_irq(&l3->list_lock);
1087 }
1034 mutex_unlock(&cache_chain_mutex); 1088 mutex_unlock(&cache_chain_mutex);
1035 break; 1089 break;
1036#endif 1090#endif
@@ -1119,7 +1173,6 @@ void __init kmem_cache_init(void)
1119 BUG(); 1173 BUG();
1120 1174
1121 cache_cache.colour = left_over / cache_cache.colour_off; 1175 cache_cache.colour = left_over / cache_cache.colour_off;
1122 cache_cache.colour_next = 0;
1123 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + 1176 cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1124 sizeof(struct slab), cache_line_size()); 1177 sizeof(struct slab), cache_line_size());
1125 1178
@@ -2011,18 +2064,16 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
2011 2064
2012 smp_call_function_all_cpus(do_drain, cachep); 2065 smp_call_function_all_cpus(do_drain, cachep);
2013 check_irq_on(); 2066 check_irq_on();
2014 spin_lock_irq(&cachep->spinlock);
2015 for_each_online_node(node) { 2067 for_each_online_node(node) {
2016 l3 = cachep->nodelists[node]; 2068 l3 = cachep->nodelists[node];
2017 if (l3) { 2069 if (l3) {
2018 spin_lock(&l3->list_lock); 2070 spin_lock_irq(&l3->list_lock);
2019 drain_array_locked(cachep, l3->shared, 1, node); 2071 drain_array_locked(cachep, l3->shared, 1, node);
2020 spin_unlock(&l3->list_lock); 2072 spin_unlock_irq(&l3->list_lock);
2021 if (l3->alien) 2073 if (l3->alien)
2022 drain_alien_cache(cachep, l3); 2074 drain_alien_cache(cachep, l3->alien);
2023 } 2075 }
2024 } 2076 }
2025 spin_unlock_irq(&cachep->spinlock);
2026} 2077}
2027 2078
2028static int __node_shrink(struct kmem_cache *cachep, int node) 2079static int __node_shrink(struct kmem_cache *cachep, int node)
@@ -2324,20 +2375,20 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2324 */ 2375 */
2325 ctor_flags |= SLAB_CTOR_ATOMIC; 2376 ctor_flags |= SLAB_CTOR_ATOMIC;
2326 2377
2327 /* About to mess with non-constant members - lock. */ 2378 /* Take the l3 list lock to change the colour_next on this node */
2328 check_irq_off(); 2379 check_irq_off();
2329 spin_lock(&cachep->spinlock); 2380 l3 = cachep->nodelists[nodeid];
2381 spin_lock(&l3->list_lock);
2330 2382
2331 /* Get colour for the slab, and cal the next value. */ 2383 /* Get colour for the slab, and cal the next value. */
2332 offset = cachep->colour_next; 2384 offset = l3->colour_next;
2333 cachep->colour_next++; 2385 l3->colour_next++;
2334 if (cachep->colour_next >= cachep->colour) 2386 if (l3->colour_next >= cachep->colour)
2335 cachep->colour_next = 0; 2387 l3->colour_next = 0;
2336 offset *= cachep->colour_off; 2388 spin_unlock(&l3->list_lock);
2337 2389
2338 spin_unlock(&cachep->spinlock); 2390 offset *= cachep->colour_off;
2339 2391
2340 check_irq_off();
2341 if (local_flags & __GFP_WAIT) 2392 if (local_flags & __GFP_WAIT)
2342 local_irq_enable(); 2393 local_irq_enable();
2343 2394
@@ -2367,7 +2418,6 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2367 if (local_flags & __GFP_WAIT) 2418 if (local_flags & __GFP_WAIT)
2368 local_irq_disable(); 2419 local_irq_disable();
2369 check_irq_off(); 2420 check_irq_off();
2370 l3 = cachep->nodelists[nodeid];
2371 spin_lock(&l3->list_lock); 2421 spin_lock(&l3->list_lock);
2372 2422
2373 /* Make slab active. */ 2423 /* Make slab active. */
@@ -2725,6 +2775,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node
2725 BUG_ON(!l3); 2775 BUG_ON(!l3);
2726 2776
2727 retry: 2777 retry:
2778 check_irq_off();
2728 spin_lock(&l3->list_lock); 2779 spin_lock(&l3->list_lock);
2729 entry = l3->slabs_partial.next; 2780 entry = l3->slabs_partial.next;
2730 if (entry == &l3->slabs_partial) { 2781 if (entry == &l3->slabs_partial) {
@@ -3304,11 +3355,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount
3304 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); 3355 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
3305 3356
3306 check_irq_on(); 3357 check_irq_on();
3307 spin_lock_irq(&cachep->spinlock); 3358 spin_lock(&cachep->spinlock);
3308 cachep->batchcount = batchcount; 3359 cachep->batchcount = batchcount;
3309 cachep->limit = limit; 3360 cachep->limit = limit;
3310 cachep->shared = shared; 3361 cachep->shared = shared;
3311 spin_unlock_irq(&cachep->spinlock); 3362 spin_unlock(&cachep->spinlock);
3312 3363
3313 for_each_online_cpu(i) { 3364 for_each_online_cpu(i) {
3314 struct array_cache *ccold = new.new[i]; 3365 struct array_cache *ccold = new.new[i];
@@ -3440,7 +3491,7 @@ static void cache_reap(void *unused)
3440 3491
3441 l3 = searchp->nodelists[numa_node_id()]; 3492 l3 = searchp->nodelists[numa_node_id()];
3442 if (l3->alien) 3493 if (l3->alien)
3443 drain_alien_cache(searchp, l3); 3494 drain_alien_cache(searchp, l3->alien);
3444 spin_lock_irq(&l3->list_lock); 3495 spin_lock_irq(&l3->list_lock);
3445 3496
3446 drain_array_locked(searchp, cpu_cache_get(searchp), 0, 3497 drain_array_locked(searchp, cpu_cache_get(searchp), 0,
@@ -3564,8 +3615,7 @@ static int s_show(struct seq_file *m, void *p)
3564 int node; 3615 int node;
3565 struct kmem_list3 *l3; 3616 struct kmem_list3 *l3;
3566 3617
3567 check_irq_on(); 3618 spin_lock(&cachep->spinlock);
3568 spin_lock_irq(&cachep->spinlock);
3569 active_objs = 0; 3619 active_objs = 0;
3570 num_slabs = 0; 3620 num_slabs = 0;
3571 for_each_online_node(node) { 3621 for_each_online_node(node) {
@@ -3573,7 +3623,8 @@ static int s_show(struct seq_file *m, void *p)
3573 if (!l3) 3623 if (!l3)
3574 continue; 3624 continue;
3575 3625
3576 spin_lock(&l3->list_lock); 3626 check_irq_on();
3627 spin_lock_irq(&l3->list_lock);
3577 3628
3578 list_for_each(q, &l3->slabs_full) { 3629 list_for_each(q, &l3->slabs_full) {
3579 slabp = list_entry(q, struct slab, list); 3630 slabp = list_entry(q, struct slab, list);
@@ -3598,9 +3649,10 @@ static int s_show(struct seq_file *m, void *p)
3598 num_slabs++; 3649 num_slabs++;
3599 } 3650 }
3600 free_objects += l3->free_objects; 3651 free_objects += l3->free_objects;
3601 shared_avail += l3->shared->avail; 3652 if (l3->shared)
3653 shared_avail += l3->shared->avail;
3602 3654
3603 spin_unlock(&l3->list_lock); 3655 spin_unlock_irq(&l3->list_lock);
3604 } 3656 }
3605 num_slabs += active_slabs; 3657 num_slabs += active_slabs;
3606 num_objs = num_slabs * cachep->num; 3658 num_objs = num_slabs * cachep->num;
@@ -3644,7 +3696,7 @@ static int s_show(struct seq_file *m, void *p)
3644 } 3696 }
3645#endif 3697#endif
3646 seq_putc(m, '\n'); 3698 seq_putc(m, '\n');
3647 spin_unlock_irq(&cachep->spinlock); 3699 spin_unlock(&cachep->spinlock);
3648 return 0; 3700 return 0;
3649} 3701}
3650 3702