1 files changed, 114 insertions, 62 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 71370256a7eb..d66c2b0d9715 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -294,6 +294,7 @@ struct kmem_list3 {
        unsigned long next_reap;
        int free_touched;
        unsigned int free_limit;
+        unsigned int colour_next;       /* Per-node cache coloring */
        spinlock_t list_lock;
        struct array_cache *shared;     /* shared per node */
        struct array_cache **alien;     /* on other nodes */
@@ -344,6 +345,7 @@ static void kmem_list3_init(struct kmem_list3 *parent)
        INIT_LIST_HEAD(&parent->slabs_free);
        parent->shared = NULL;
        parent->alien = NULL;
+        parent->colour_next = 0;
        spin_lock_init(&parent->list_lock);
        parent->free_objects = 0;
        parent->free_touched = 0;
@@ -390,7 +392,6 @@ struct kmem_cache {
        size_t colour;          /* cache colouring range */
        unsigned int colour_off;        /* colour offset */
-        unsigned int colour_next;       /* cache colouring */
        struct kmem_cache *slabp_cache;
        unsigned int slab_size;
        unsigned int dflags;    /* dynamic flags */
@@ -883,14 +884,14 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
        }
 }
-static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3)
+static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien)
 {
        int i = 0;
        struct array_cache *ac;
        unsigned long flags;
        for_each_online_node(i) {
-                ac = l3->alien[i];
+                ac = alien[i];
                if (ac) {
                        spin_lock_irqsave(&ac->lock, flags);
                        __drain_alien_cache(cachep, ac, i);
@@ -899,9 +900,18 @@ static void drain_alien_cache(struct kmem_cache *cachep, struct kmem_list3 *l3)
        }
 }
 #else
-#define alloc_alien_cache(node, limit) do { } while (0)
-#define free_alien_cache(ac_ptr) do { } while (0)
+#define drain_alien_cache(cachep, alien) do { } while (0)
-#define drain_alien_cache(cachep, l3) do { } while (0)
+static inline struct array_cache **alloc_alien_cache(int node, int limit)
+{
+        return (struct array_cache **) 0x01020304ul;
+}
+static inline void free_alien_cache(struct array_cache **ac_ptr)
+{
+}
 #endif
 static int __devinit cpuup_callback(struct notifier_block *nfb,
@@ -935,6 +945,11 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
                                l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
                                    ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
+                                /*
+                                 * The l3s don't come and go as CPUs come and
+                                 * go.  cache_chain_mutex is sufficient
+                                 * protection here.
+                                 */
                                cachep->nodelists[node] = l3;
                        }
@@ -949,26 +964,46 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
                   & array cache's */
                list_for_each_entry(cachep, &cache_chain, next) {
                        struct array_cache *nc;
+                        struct array_cache *shared;
+                        struct array_cache **alien;
                        nc = alloc_arraycache(node, cachep->limit,
-                                              cachep->batchcount);
+                                                cachep->batchcount);
                        if (!nc)
                                goto bad;
+                        shared = alloc_arraycache(node,
+                                        cachep->shared * cachep->batchcount,
+                                        0xbaadf00d);
+                        if (!shared)
+                                goto bad;
+                        alien = alloc_alien_cache(node, cachep->limit);
+                        if (!alien)
+                                goto bad;
                        cachep->array[cpu] = nc;
                        l3 = cachep->nodelists[node];
                        BUG_ON(!l3);
-                        if (!l3->shared) {
-                                if (!(nc = alloc_arraycache(node,
-                                                            cachep->shared *
-                                                            cachep->batchcount,
-                                                            0xbaadf00d)))
-                                        goto bad;
-                                /* we are serialised from CPU_DEAD or
+                        spin_lock_irq(&l3->list_lock);
-                                   CPU_UP_CANCELLED by the cpucontrol lock */
+                        if (!l3->shared) {
-                                l3->shared = nc;
+                                /*
+                                 * We are serialised from CPU_DEAD or
+                                 * CPU_UP_CANCELLED by the cpucontrol lock
+                                 */
+                                l3->shared = shared;
+                                shared = NULL;
+                        }
+#ifdef CONFIG_NUMA
+                        if (!l3->alien) {
+                                l3->alien = alien;
+                                alien = NULL;
                        }
+#endif
+                        spin_unlock_irq(&l3->list_lock);
+                        kfree(shared);
+                        free_alien_cache(alien);
                }
                mutex_unlock(&cache_chain_mutex);
                break;
@@ -977,25 +1012,34 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
                break;
 #ifdef CONFIG_HOTPLUG_CPU
        case CPU_DEAD:
+                /*
+                 * Even if all the cpus of a node are down, we don't free the
+                 * kmem_list3 of any cache. This to avoid a race between
+                 * cpu_down, and a kmalloc allocation from another cpu for
+                 * memory from the node of the cpu going down.  The list3
+                 * structure is usually allocated from kmem_cache_create() and
+                 * gets destroyed at kmem_cache_destroy().
+                 */
                /* fall thru */
        case CPU_UP_CANCELED:
                mutex_lock(&cache_chain_mutex);
                list_for_each_entry(cachep, &cache_chain, next) {
                        struct array_cache *nc;
+                        struct array_cache *shared;
+                        struct array_cache **alien;
                        cpumask_t mask;
                        mask = node_to_cpumask(node);
-                        spin_lock_irq(&cachep->spinlock);
                        /* cpu is dead; no one can alloc from it. */
                        nc = cachep->array[cpu];
                        cachep->array[cpu] = NULL;
                        l3 = cachep->nodelists[node];
                        if (!l3)
-                                goto unlock_cache;
+                                goto free_array_cache;
-                        spin_lock(&l3->list_lock);
+                        spin_lock_irq(&l3->list_lock);
                        /* Free limit for this kmem_list3 */
                        l3->free_limit -= cachep->batchcount;
@@ -1003,34 +1047,44 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
                                free_block(cachep, nc->entry, nc->avail, node);
                        if (!cpus_empty(mask)) {
-                                spin_unlock(&l3->list_lock);
+                                spin_unlock_irq(&l3->list_lock);
-                                goto unlock_cache;
+                                goto free_array_cache;
                        }
-                        if (l3->shared) {
+                        shared = l3->shared;
+                        if (shared) {
                                free_block(cachep, l3->shared->entry,
                                           l3->shared->avail, node);
-                                kfree(l3->shared);
                                l3->shared = NULL;
                        }
-                        if (l3->alien) {
-                                drain_alien_cache(cachep, l3);
-                                free_alien_cache(l3->alien);
-                                l3->alien = NULL;
-                        }
-                        /* free slabs belonging to this node */
+                        alien = l3->alien;
-                        if (__node_shrink(cachep, node)) {
+                        l3->alien = NULL;
-                                cachep->nodelists[node] = NULL;
-                                spin_unlock(&l3->list_lock);
+                        spin_unlock_irq(&l3->list_lock);
-                                kfree(l3);
-                        } else {
+                        kfree(shared);
-                                spin_unlock(&l3->list_lock);
+                        if (alien) {
+                                drain_alien_cache(cachep, alien);
+                                free_alien_cache(alien);
                        }
-                      unlock_cache:
+free_array_cache:
-                        spin_unlock_irq(&cachep->spinlock);
                        kfree(nc);
                }
+                /*
+                 * In the previous loop, all the objects were freed to
+                 * the respective cache's slabs,  now we can go ahead and
+                 * shrink each nodelist to its limit.
+                 */
+                list_for_each_entry(cachep, &cache_chain, next) {
+                        l3 = cachep->nodelists[node];
+                        if (!l3)
+                                continue;
+                        spin_lock_irq(&l3->list_lock);
+                        /* free slabs belonging to this node */
+                        __node_shrink(cachep, node);
+                        spin_unlock_irq(&l3->list_lock);
+                }
                mutex_unlock(&cache_chain_mutex);
                break;
 #endif
@@ -1119,7 +1173,6 @@ void __init kmem_cache_init(void)
                BUG();
        cache_cache.colour = left_over / cache_cache.colour_off;
-        cache_cache.colour_next = 0;
        cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
                                      sizeof(struct slab), cache_line_size());
@@ -2011,18 +2064,16 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
        smp_call_function_all_cpus(do_drain, cachep);
        check_irq_on();
-        spin_lock_irq(&cachep->spinlock);
        for_each_online_node(node) {
                l3 = cachep->nodelists[node];
                if (l3) {
-                        spin_lock(&l3->list_lock);
+                        spin_lock_irq(&l3->list_lock);
                        drain_array_locked(cachep, l3->shared, 1, node);
-                        spin_unlock(&l3->list_lock);
+                        spin_unlock_irq(&l3->list_lock);
                        if (l3->alien)
-                                drain_alien_cache(cachep, l3);
+                                drain_alien_cache(cachep, l3->alien);
                }
        }
-        spin_unlock_irq(&cachep->spinlock);
 }
 static int __node_shrink(struct kmem_cache *cachep, int node)
@@ -2324,20 +2375,20 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
                 */
                ctor_flags |= SLAB_CTOR_ATOMIC;
-        /* About to mess with non-constant members - lock. */
+        /* Take the l3 list lock to change the colour_next on this node */
        check_irq_off();
-        spin_lock(&cachep->spinlock);
+        l3 = cachep->nodelists[nodeid];
+        spin_lock(&l3->list_lock);
        /* Get colour for the slab, and cal the next value. */
-        offset = cachep->colour_next;
+        offset = l3->colour_next;
-        cachep->colour_next++;
+        l3->colour_next++;
-        if (cachep->colour_next >= cachep->colour)
+        if (l3->colour_next >= cachep->colour)
-                cachep->colour_next = 0;
+                l3->colour_next = 0;
-        offset *= cachep->colour_off;
+        spin_unlock(&l3->list_lock);
-        spin_unlock(&cachep->spinlock);
+        offset *= cachep->colour_off;
-        check_irq_off();
        if (local_flags & __GFP_WAIT)
                local_irq_enable();
@@ -2367,7 +2418,6 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
        if (local_flags & __GFP_WAIT)
                local_irq_disable();
        check_irq_off();
-        l3 = cachep->nodelists[nodeid];
        spin_lock(&l3->list_lock);
        /* Make slab active. */
@@ -2725,6 +2775,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node
        BUG_ON(!l3);
      retry:
+        check_irq_off();
        spin_lock(&l3->list_lock);
        entry = l3->slabs_partial.next;
        if (entry == &l3->slabs_partial) {
@@ -3304,11 +3355,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount
        smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
        check_irq_on();
-        spin_lock_irq(&cachep->spinlock);
+        spin_lock(&cachep->spinlock);
        cachep->batchcount = batchcount;
        cachep->limit = limit;
        cachep->shared = shared;
-        spin_unlock_irq(&cachep->spinlock);
+        spin_unlock(&cachep->spinlock);
        for_each_online_cpu(i) {
                struct array_cache *ccold = new.new[i];
@@ -3440,7 +3491,7 @@ static void cache_reap(void *unused)
                l3 = searchp->nodelists[numa_node_id()];
                if (l3->alien)
-                        drain_alien_cache(searchp, l3);
+                        drain_alien_cache(searchp, l3->alien);
                spin_lock_irq(&l3->list_lock);
                drain_array_locked(searchp, cpu_cache_get(searchp), 0,
@@ -3564,8 +3615,7 @@ static int s_show(struct seq_file *m, void *p)
        int node;
        struct kmem_list3 *l3;
-        check_irq_on();
+        spin_lock(&cachep->spinlock);
-        spin_lock_irq(&cachep->spinlock);
        active_objs = 0;
        num_slabs = 0;
        for_each_online_node(node) {
@@ -3573,7 +3623,8 @@ static int s_show(struct seq_file *m, void *p)
                if (!l3)
                        continue;
-                spin_lock(&l3->list_lock);
+                check_irq_on();
+                spin_lock_irq(&l3->list_lock);
                list_for_each(q, &l3->slabs_full) {
                        slabp = list_entry(q, struct slab, list);
@@ -3598,9 +3649,10 @@ static int s_show(struct seq_file *m, void *p)
                        num_slabs++;
                }
                free_objects += l3->free_objects;
-                shared_avail += l3->shared->avail;
+                if (l3->shared)
+                        shared_avail += l3->shared->avail;
-                spin_unlock(&l3->list_lock);
+                spin_unlock_irq(&l3->list_lock);
        }
        num_slabs += active_slabs;
        num_objs = num_slabs * cachep->num;
@@ -3644,7 +3696,7 @@ static int s_show(struct seq_file *m, void *p)
        }
 #endif
        seq_putc(m, '\n');
-        spin_unlock_irq(&cachep->spinlock);
+        spin_unlock(&cachep->spinlock);
        return 0;
 }

diff --git a/mm/slab.c b/mm/slab.c index 71370256a7eb..d66c2b0d9715 100644 --- a/mm/slab.c +++ b/mm/slab.c
@@ -294,6 +294,7 @@ struct kmem_list3 {
294	unsigned long next_reap;	294	unsigned long next_reap;
295	int free_touched;	295	int free_touched;
296	unsigned int free_limit;	296	unsigned int free_limit;
		297	unsigned int colour_next; /* Per-node cache coloring */
297	spinlock_t list_lock;	298	spinlock_t list_lock;
298	struct array_cache shared; / shared per node */	299	struct array_cache shared; / shared per node */
299	struct array_cache *alien; / on other nodes */	300	struct array_cache *alien; / on other nodes */
@@ -344,6 +345,7 @@ static void kmem_list3_init(struct kmem_list3 *parent)
344	INIT_LIST_HEAD(&parent->slabs_free);	345	INIT_LIST_HEAD(&parent->slabs_free);
345	parent->shared = NULL;	346	parent->shared = NULL;
346	parent->alien = NULL;	347	parent->alien = NULL;
		348	parent->colour_next = 0;
347	spin_lock_init(&parent->list_lock);	349	spin_lock_init(&parent->list_lock);
348	parent->free_objects = 0;	350	parent->free_objects = 0;
349	parent->free_touched = 0;	351	parent->free_touched = 0;
@@ -390,7 +392,6 @@ struct kmem_cache {
390		392
391	size_t colour; /* cache colouring range */	393	size_t colour; /* cache colouring range */
392	unsigned int colour_off; /* colour offset */	394	unsigned int colour_off; /* colour offset */
393	unsigned int colour_next; /* cache colouring */
394	struct kmem_cache *slabp_cache;	395	struct kmem_cache *slabp_cache;
395	unsigned int slab_size;	396	unsigned int slab_size;
396	unsigned int dflags; /* dynamic flags */	397	unsigned int dflags; /* dynamic flags */
@@ -883,14 +884,14 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
883	}	884	}
884	}	885	}
885		886
886	static void drain_alien_cache(struct kmem_cache cachep, struct kmem_list3 l3)	887	static void drain_alien_cache(struct kmem_cache cachep, struct array_cache *alien)
887	{	888	{
888	int i = 0;	889	int i = 0;
889	struct array_cache *ac;	890	struct array_cache *ac;
890	unsigned long flags;	891	unsigned long flags;
891		892
892	for_each_online_node(i) {	893	for_each_online_node(i) {
893	ac = l3->alien[i];	894	ac = alien[i];
894	if (ac) {	895	if (ac) {
895	spin_lock_irqsave(&ac->lock, flags);	896	spin_lock_irqsave(&ac->lock, flags);
896	__drain_alien_cache(cachep, ac, i);	897	__drain_alien_cache(cachep, ac, i);
@@ -899,9 +900,18 @@ static void drain_alien_cache(struct kmem_cache cachep, struct kmem_list3 l3)
899	}	900	}
900	}	901	}
901	#else	902	#else
902	#define alloc_alien_cache(node, limit) do { } while (0)	903
903	#define free_alien_cache(ac_ptr) do { } while (0)	904	#define drain_alien_cache(cachep, alien) do { } while (0)
904	#define drain_alien_cache(cachep, l3) do { } while (0)	905
		906	static inline struct array_cache **alloc_alien_cache(int node, int limit)
		907	{
		908	return (struct array_cache **) 0x01020304ul;
		909	}
		910
		911	static inline void free_alien_cache(struct array_cache **ac_ptr)
		912	{
		913	}
		914
905	#endif	915	#endif
906		916
907	static int __devinit cpuup_callback(struct notifier_block *nfb,	917	static int __devinit cpuup_callback(struct notifier_block *nfb,
@@ -935,6 +945,11 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
935	l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +	945	l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
936	((unsigned long)cachep) % REAPTIMEOUT_LIST3;	946	((unsigned long)cachep) % REAPTIMEOUT_LIST3;
937		947
		948	/*
		949	* The l3s don't come and go as CPUs come and
		950	* go. cache_chain_mutex is sufficient
		951	* protection here.
		952	*/
938	cachep->nodelists[node] = l3;	953	cachep->nodelists[node] = l3;
939	}	954	}
940		955
@@ -949,26 +964,46 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
949	& array cache's */	964	& array cache's */
950	list_for_each_entry(cachep, &cache_chain, next) {	965	list_for_each_entry(cachep, &cache_chain, next) {
951	struct array_cache *nc;	966	struct array_cache *nc;
		967	struct array_cache *shared;
		968	struct array_cache **alien;
952		969
953	nc = alloc_arraycache(node, cachep->limit,	970	nc = alloc_arraycache(node, cachep->limit,
954	cachep->batchcount);	971	cachep->batchcount);
955	if (!nc)	972	if (!nc)
956	goto bad;	973	goto bad;
		974	shared = alloc_arraycache(node,
		975	cachep->shared * cachep->batchcount,
		976	0xbaadf00d);
		977	if (!shared)
		978	goto bad;
		979
		980	alien = alloc_alien_cache(node, cachep->limit);
		981	if (!alien)
		982	goto bad;
957	cachep->array[cpu] = nc;	983	cachep->array[cpu] = nc;
958		984
959	l3 = cachep->nodelists[node];	985	l3 = cachep->nodelists[node];
960	BUG_ON(!l3);	986	BUG_ON(!l3);
961	if (!l3->shared) {
962	if (!(nc = alloc_arraycache(node,
963	cachep->shared *
964	cachep->batchcount,
965	0xbaadf00d)))
966	goto bad;
967		987
968	/* we are serialised from CPU_DEAD or	988	spin_lock_irq(&l3->list_lock);
969	CPU_UP_CANCELLED by the cpucontrol lock */	989	if (!l3->shared) {
970	l3->shared = nc;	990	/*
		991	* We are serialised from CPU_DEAD or
		992	* CPU_UP_CANCELLED by the cpucontrol lock
		993	*/
		994	l3->shared = shared;
		995	shared = NULL;
		996	}
		997	#ifdef CONFIG_NUMA
		998	if (!l3->alien) {
		999	l3->alien = alien;
		1000	alien = NULL;
971	}	1001	}
		1002	#endif
		1003	spin_unlock_irq(&l3->list_lock);
		1004
		1005	kfree(shared);
		1006	free_alien_cache(alien);
972	}	1007	}
973	mutex_unlock(&cache_chain_mutex);	1008	mutex_unlock(&cache_chain_mutex);
974	break;	1009	break;
@@ -977,25 +1012,34 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
977	break;	1012	break;
978	#ifdef CONFIG_HOTPLUG_CPU	1013	#ifdef CONFIG_HOTPLUG_CPU
979	case CPU_DEAD:	1014	case CPU_DEAD:
		1015	/*
		1016	* Even if all the cpus of a node are down, we don't free the
		1017	* kmem_list3 of any cache. This to avoid a race between
		1018	* cpu_down, and a kmalloc allocation from another cpu for
		1019	* memory from the node of the cpu going down. The list3
		1020	* structure is usually allocated from kmem_cache_create() and
		1021	* gets destroyed at kmem_cache_destroy().
		1022	*/
980	/* fall thru */	1023	/* fall thru */
981	case CPU_UP_CANCELED:	1024	case CPU_UP_CANCELED:
982	mutex_lock(&cache_chain_mutex);	1025	mutex_lock(&cache_chain_mutex);
983		1026
984	list_for_each_entry(cachep, &cache_chain, next) {	1027	list_for_each_entry(cachep, &cache_chain, next) {
985	struct array_cache *nc;	1028	struct array_cache *nc;
		1029	struct array_cache *shared;
		1030	struct array_cache **alien;
986	cpumask_t mask;	1031	cpumask_t mask;
987		1032
988	mask = node_to_cpumask(node);	1033	mask = node_to_cpumask(node);
989	spin_lock_irq(&cachep->spinlock);
990	/* cpu is dead; no one can alloc from it. */	1034	/* cpu is dead; no one can alloc from it. */
991	nc = cachep->array[cpu];	1035	nc = cachep->array[cpu];
992	cachep->array[cpu] = NULL;	1036	cachep->array[cpu] = NULL;
993	l3 = cachep->nodelists[node];	1037	l3 = cachep->nodelists[node];
994		1038
995	if (!l3)	1039	if (!l3)
996	goto unlock_cache;	1040	goto free_array_cache;
997		1041
998	spin_lock(&l3->list_lock);	1042	spin_lock_irq(&l3->list_lock);
999		1043
1000	/* Free limit for this kmem_list3 */	1044	/* Free limit for this kmem_list3 */
1001	l3->free_limit -= cachep->batchcount;	1045	l3->free_limit -= cachep->batchcount;
@@ -1003,34 +1047,44 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
1003	free_block(cachep, nc->entry, nc->avail, node);	1047	free_block(cachep, nc->entry, nc->avail, node);
1004		1048
1005	if (!cpus_empty(mask)) {	1049	if (!cpus_empty(mask)) {
1006	spin_unlock(&l3->list_lock);	1050	spin_unlock_irq(&l3->list_lock);
1007	goto unlock_cache;	1051	goto free_array_cache;
1008	}	1052	}
1009		1053
1010	if (l3->shared) {	1054	shared = l3->shared;
		1055	if (shared) {
1011	free_block(cachep, l3->shared->entry,	1056	free_block(cachep, l3->shared->entry,
1012	l3->shared->avail, node);	1057	l3->shared->avail, node);
1013	kfree(l3->shared);
1014	l3->shared = NULL;	1058	l3->shared = NULL;
1015	}	1059	}
1016	if (l3->alien) {
1017	drain_alien_cache(cachep, l3);
1018	free_alien_cache(l3->alien);
1019	l3->alien = NULL;
1020	}
1021		1060
1022	/* free slabs belonging to this node */	1061	alien = l3->alien;
1023	if (__node_shrink(cachep, node)) {	1062	l3->alien = NULL;
1024	cachep->nodelists[node] = NULL;	1063
1025	spin_unlock(&l3->list_lock);	1064	spin_unlock_irq(&l3->list_lock);
1026	kfree(l3);	1065
1027	} else {	1066	kfree(shared);
1028	spin_unlock(&l3->list_lock);	1067	if (alien) {
		1068	drain_alien_cache(cachep, alien);
		1069	free_alien_cache(alien);
1029	}	1070	}
1030	unlock_cache:	1071	free_array_cache:
1031	spin_unlock_irq(&cachep->spinlock);
1032	kfree(nc);	1072	kfree(nc);
1033	}	1073	}
		1074	/*
		1075	* In the previous loop, all the objects were freed to
		1076	* the respective cache's slabs, now we can go ahead and
		1077	* shrink each nodelist to its limit.
		1078	*/
		1079	list_for_each_entry(cachep, &cache_chain, next) {
		1080	l3 = cachep->nodelists[node];
		1081	if (!l3)
		1082	continue;
		1083	spin_lock_irq(&l3->list_lock);
		1084	/* free slabs belonging to this node */
		1085	__node_shrink(cachep, node);
		1086	spin_unlock_irq(&l3->list_lock);
		1087	}
1034	mutex_unlock(&cache_chain_mutex);	1088	mutex_unlock(&cache_chain_mutex);
1035	break;	1089	break;
1036	#endif	1090	#endif
@@ -1119,7 +1173,6 @@ void __init kmem_cache_init(void)
1119	BUG();	1173	BUG();
1120		1174
1121	cache_cache.colour = left_over / cache_cache.colour_off;	1175	cache_cache.colour = left_over / cache_cache.colour_off;
1122	cache_cache.colour_next = 0;
1123	cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +	1176	cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
1124	sizeof(struct slab), cache_line_size());	1177	sizeof(struct slab), cache_line_size());
1125		1178
@@ -2011,18 +2064,16 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
2011		2064
2012	smp_call_function_all_cpus(do_drain, cachep);	2065	smp_call_function_all_cpus(do_drain, cachep);
2013	check_irq_on();	2066	check_irq_on();
2014	spin_lock_irq(&cachep->spinlock);
2015	for_each_online_node(node) {	2067	for_each_online_node(node) {
2016	l3 = cachep->nodelists[node];	2068	l3 = cachep->nodelists[node];
2017	if (l3) {	2069	if (l3) {
2018	spin_lock(&l3->list_lock);	2070	spin_lock_irq(&l3->list_lock);
2019	drain_array_locked(cachep, l3->shared, 1, node);	2071	drain_array_locked(cachep, l3->shared, 1, node);
2020	spin_unlock(&l3->list_lock);	2072	spin_unlock_irq(&l3->list_lock);
2021	if (l3->alien)	2073	if (l3->alien)
2022	drain_alien_cache(cachep, l3);	2074	drain_alien_cache(cachep, l3->alien);
2023	}	2075	}
2024	}	2076	}
2025	spin_unlock_irq(&cachep->spinlock);
2026	}	2077	}
2027		2078
2028	static int __node_shrink(struct kmem_cache *cachep, int node)	2079	static int __node_shrink(struct kmem_cache *cachep, int node)
@@ -2324,20 +2375,20 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2324	*/	2375	*/
2325	ctor_flags \|= SLAB_CTOR_ATOMIC;	2376	ctor_flags \|= SLAB_CTOR_ATOMIC;
2326		2377
2327	/* About to mess with non-constant members - lock. */	2378	/* Take the l3 list lock to change the colour_next on this node */
2328	check_irq_off();	2379	check_irq_off();
2329	spin_lock(&cachep->spinlock);	2380	l3 = cachep->nodelists[nodeid];
		2381	spin_lock(&l3->list_lock);
2330		2382
2331	/* Get colour for the slab, and cal the next value. */	2383	/* Get colour for the slab, and cal the next value. */
2332	offset = cachep->colour_next;	2384	offset = l3->colour_next;
2333	cachep->colour_next++;	2385	l3->colour_next++;
2334	if (cachep->colour_next >= cachep->colour)	2386	if (l3->colour_next >= cachep->colour)
2335	cachep->colour_next = 0;	2387	l3->colour_next = 0;
2336	offset *= cachep->colour_off;	2388	spin_unlock(&l3->list_lock);
2337		2389
2338	spin_unlock(&cachep->spinlock);	2390	offset *= cachep->colour_off;
2339		2391
2340	check_irq_off();
2341	if (local_flags & __GFP_WAIT)	2392	if (local_flags & __GFP_WAIT)
2342	local_irq_enable();	2393	local_irq_enable();
2343		2394
@@ -2367,7 +2418,6 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2367	if (local_flags & __GFP_WAIT)	2418	if (local_flags & __GFP_WAIT)
2368	local_irq_disable();	2419	local_irq_disable();
2369	check_irq_off();	2420	check_irq_off();
2370	l3 = cachep->nodelists[nodeid];
2371	spin_lock(&l3->list_lock);	2421	spin_lock(&l3->list_lock);
2372		2422
2373	/* Make slab active. */	2423	/* Make slab active. */
@@ -2725,6 +2775,7 @@ static void __cache_alloc_node(struct kmem_cache cachep, gfp_t flags, int node
2725	BUG_ON(!l3);	2775	BUG_ON(!l3);
2726		2776
2727	retry:	2777	retry:
		2778	check_irq_off();
2728	spin_lock(&l3->list_lock);	2779	spin_lock(&l3->list_lock);
2729	entry = l3->slabs_partial.next;	2780	entry = l3->slabs_partial.next;
2730	if (entry == &l3->slabs_partial) {	2781	if (entry == &l3->slabs_partial) {
@@ -3304,11 +3355,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount
3304	smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);	3355	smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
3305		3356
3306	check_irq_on();	3357	check_irq_on();
3307	spin_lock_irq(&cachep->spinlock);	3358	spin_lock(&cachep->spinlock);
3308	cachep->batchcount = batchcount;	3359	cachep->batchcount = batchcount;
3309	cachep->limit = limit;	3360	cachep->limit = limit;
3310	cachep->shared = shared;	3361	cachep->shared = shared;
3311	spin_unlock_irq(&cachep->spinlock);	3362	spin_unlock(&cachep->spinlock);
3312		3363
3313	for_each_online_cpu(i) {	3364	for_each_online_cpu(i) {
3314	struct array_cache *ccold = new.new[i];	3365	struct array_cache *ccold = new.new[i];
@@ -3440,7 +3491,7 @@ static void cache_reap(void *unused)
3440		3491
3441	l3 = searchp->nodelists[numa_node_id()];	3492	l3 = searchp->nodelists[numa_node_id()];
3442	if (l3->alien)	3493	if (l3->alien)
3443	drain_alien_cache(searchp, l3);	3494	drain_alien_cache(searchp, l3->alien);
3444	spin_lock_irq(&l3->list_lock);	3495	spin_lock_irq(&l3->list_lock);
3445		3496
3446	drain_array_locked(searchp, cpu_cache_get(searchp), 0,	3497	drain_array_locked(searchp, cpu_cache_get(searchp), 0,
@@ -3564,8 +3615,7 @@ static int s_show(struct seq_file m, void p)
3564	int node;	3615	int node;
3565	struct kmem_list3 *l3;	3616	struct kmem_list3 *l3;
3566		3617
3567	check_irq_on();	3618	spin_lock(&cachep->spinlock);
3568	spin_lock_irq(&cachep->spinlock);
3569	active_objs = 0;	3619	active_objs = 0;
3570	num_slabs = 0;	3620	num_slabs = 0;
3571	for_each_online_node(node) {	3621	for_each_online_node(node) {
@@ -3573,7 +3623,8 @@ static int s_show(struct seq_file m, void p)
3573	if (!l3)	3623	if (!l3)
3574	continue;	3624	continue;
3575		3625
3576	spin_lock(&l3->list_lock);	3626	check_irq_on();
		3627	spin_lock_irq(&l3->list_lock);
3577		3628
3578	list_for_each(q, &l3->slabs_full) {	3629	list_for_each(q, &l3->slabs_full) {
3579	slabp = list_entry(q, struct slab, list);	3630	slabp = list_entry(q, struct slab, list);
@@ -3598,9 +3649,10 @@ static int s_show(struct seq_file m, void p)
3598	num_slabs++;	3649	num_slabs++;
3599	}	3650	}
3600	free_objects += l3->free_objects;	3651	free_objects += l3->free_objects;
3601	shared_avail += l3->shared->avail;	3652	if (l3->shared)
		3653	shared_avail += l3->shared->avail;
3602		3654
3603	spin_unlock(&l3->list_lock);	3655	spin_unlock_irq(&l3->list_lock);
3604	}	3656	}
3605	num_slabs += active_slabs;	3657	num_slabs += active_slabs;
3606	num_objs = num_slabs * cachep->num;	3658	num_objs = num_slabs * cachep->num;
@@ -3644,7 +3696,7 @@ static int s_show(struct seq_file m, void p)
3644	}	3696	}
3645	#endif	3697	#endif
3646	seq_putc(m, '\n');	3698	seq_putc(m, '\n');
3647	spin_unlock_irq(&cachep->spinlock);	3699	spin_unlock(&cachep->spinlock);
3648	return 0;	3700	return 0;
3649	}	3701	}
3650		3702