memcg: use CSS ID

Assigning CSS ID for each memcg and use css_get_next() for scanning hierarchy. Assume folloing tree. group_A (ID=3) /01 (ID=4) /0A (ID=7) /02 (ID=10) group_B (ID=5) and task in group_A/01/0A hits limit at group_A. reclaim will be done in following order (round-robin). group_A(3) -> group_A/01 (4) -> group_A/01/0A (7) -> group_A/02(10) -> group_A -> ..... Round robin by ID. The last visited cgroup is recorded and restart from it when it start reclaim again. (More smart algorithm can be implemented..) No cgroup_mutex or hierarchy_mutex is required. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Paul Menage <menage@google.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> 2009-04-02 19:57:33 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-04-02 22:04:55 -0400
commit: 04046e1a0a34286382e913f8fc461440c21d88e8 (patch)
tree: cab2b8a61e7474d509fbd3ea02e38b7c4137ce4b
parent: b4046f00ee7c1e5615261b496cf7309683275b29 (diff)
1 files changed, 82 insertions, 138 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8ffec674c5ac..61fd9590c135 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -95,6 +95,15 @@ static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
        return ret;
 }
+static s64 mem_cgroup_local_usage(struct mem_cgroup_stat *stat)
+{
+        s64 ret;
+        ret = mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_CACHE);
+        ret += mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_RSS);
+        return ret;
+}
 /*
 * per-zone information in memory controller.
 */
@@ -154,9 +163,9 @@ struct mem_cgroup {
        /*
         * While reclaiming in a hiearchy, we cache the last child we
-         * reclaimed from. Protected by hierarchy_mutex
+         * reclaimed from.
         */
-        struct mem_cgroup *last_scanned_child;
+        int last_scanned_child;
        /*
         * Should the accounting and control be hierarchical, per subtree?
         */
@@ -629,103 +638,6 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
 #define mem_cgroup_from_res_counter(counter, member)    \
        container_of(counter, struct mem_cgroup, member)
-/*
- * This routine finds the DFS walk successor. This routine should be
- * called with hierarchy_mutex held
- */
-static struct mem_cgroup *
-__mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem)
-{
-        struct cgroup *cgroup, *curr_cgroup, *root_cgroup;
-        curr_cgroup = curr->css.cgroup;
-        root_cgroup = root_mem->css.cgroup;
-        if (!list_empty(&curr_cgroup->children)) {
-                /*
-                 * Walk down to children
-                 */
-                cgroup = list_entry(curr_cgroup->children.next,
-                                                struct cgroup, sibling);
-                curr = mem_cgroup_from_cont(cgroup);
-                goto done;
-        }
-visit_parent:
-        if (curr_cgroup == root_cgroup) {
-                /* caller handles NULL case */
-                curr = NULL;
-                goto done;
-        }
-        /*
-         * Goto next sibling
-         */
-        if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) {
-                cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup,
-                                                sibling);
-                curr = mem_cgroup_from_cont(cgroup);
-                goto done;
-        }
-        /*
-         * Go up to next parent and next parent's sibling if need be
-         */
-        curr_cgroup = curr_cgroup->parent;
-        goto visit_parent;
-done:
-        return curr;
-}
-/*
- * Visit the first child (need not be the first child as per the ordering
- * of the cgroup list, since we track last_scanned_child) of @mem and use
- * that to reclaim free pages from.
- */
-static struct mem_cgroup *
-mem_cgroup_get_next_node(struct mem_cgroup *root_mem)
-{
-        struct cgroup *cgroup;
-        struct mem_cgroup *orig, *next;
-        bool obsolete;
-        /*
-         * Scan all children under the mem_cgroup mem
-         */
-        mutex_lock(&mem_cgroup_subsys.hierarchy_mutex);
-        orig = root_mem->last_scanned_child;
-        obsolete = mem_cgroup_is_obsolete(orig);
-        if (list_empty(&root_mem->css.cgroup->children)) {
-                /*
-                 * root_mem might have children before and last_scanned_child
-                 * may point to one of them. We put it later.
-                 */
-                if (orig)
-                        VM_BUG_ON(!obsolete);
-                next = NULL;
-                goto done;
-        }
-        if (!orig || obsolete) {
-                cgroup = list_first_entry(&root_mem->css.cgroup->children,
-                                struct cgroup, sibling);
-                next = mem_cgroup_from_cont(cgroup);
-        } else
-                next = __mem_cgroup_get_next_node(orig, root_mem);
-done:
-        if (next)
-                mem_cgroup_get(next);
-        root_mem->last_scanned_child = next;
-        if (orig)
-                mem_cgroup_put(orig);
-        mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
-        return (next) ? next : root_mem;
-}
 static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
 {
        if (do_swap_account) {
@@ -755,46 +667,79 @@ static unsigned int get_swappiness(struct mem_cgroup *memcg)
 }
 /*
- * Dance down the hierarchy if needed to reclaim memory. We remember the
+ * Visit the first child (need not be the first child as per the ordering
- * last child we reclaimed from, so that we don't end up penalizing
+ * of the cgroup list, since we track last_scanned_child) of @mem and use
- * one child extensively based on its position in the children list.
+ * that to reclaim free pages from.
+ */
+static struct mem_cgroup *
+mem_cgroup_select_victim(struct mem_cgroup *root_mem)
+{
+        struct mem_cgroup *ret = NULL;
+        struct cgroup_subsys_state *css;
+        int nextid, found;
+        if (!root_mem->use_hierarchy) {
+                css_get(&root_mem->css);
+                ret = root_mem;
+        }
+        while (!ret) {
+                rcu_read_lock();
+                nextid = root_mem->last_scanned_child + 1;
+                css = css_get_next(&mem_cgroup_subsys, nextid, &root_mem->css,
+                                   &found);
+                if (css && css_tryget(css))
+                        ret = container_of(css, struct mem_cgroup, css);
+                rcu_read_unlock();
+                /* Updates scanning parameter */
+                spin_lock(&root_mem->reclaim_param_lock);
+                if (!css) {
+                        /* this means start scan from ID:1 */
+                        root_mem->last_scanned_child = 0;
+                } else
+                        root_mem->last_scanned_child = found;
+                spin_unlock(&root_mem->reclaim_param_lock);
+        }
+        return ret;
+}
+/*
+ * Scan the hierarchy if needed to reclaim memory. We remember the last child
+ * we reclaimed from, so that we don't end up penalizing one child extensively
+ * based on its position in the children list.
 *
 * root_mem is the original ancestor that we've been reclaim from.
+ *
+ * We give up and return to the caller when we visit root_mem twice.
+ * (other groups can be removed while we're walking....)
 */
 static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
                                                gfp_t gfp_mask, bool noswap)
 {
-        struct mem_cgroup *next_mem;
+        struct mem_cgroup *victim;
-        int ret = 0;
+        int ret, total = 0;
+        int loop = 0;
-        /*
-         * Reclaim unconditionally and don't check for return value.
+        while (loop < 2) {
-         * We need to reclaim in the current group and down the tree.
+                victim = mem_cgroup_select_victim(root_mem);
-         * One might think about checking for children before reclaiming,
+                if (victim == root_mem)
-         * but there might be left over accounting, even after children
+                        loop++;
-         * have left.
+                if (!mem_cgroup_local_usage(&victim->stat)) {
-         */
+                        /* this cgroup's local usage == 0 */
-        ret += try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap,
+                        css_put(&victim->css);
-                                           get_swappiness(root_mem));
-        if (mem_cgroup_check_under_limit(root_mem))
-                return 1;       /* indicate reclaim has succeeded */
-        if (!root_mem->use_hierarchy)
-                return ret;
-        next_mem = mem_cgroup_get_next_node(root_mem);
-        while (next_mem != root_mem) {
-                if (mem_cgroup_is_obsolete(next_mem)) {
-                        next_mem = mem_cgroup_get_next_node(root_mem);
                        continue;
                }
-                ret += try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,
+                /* we use swappiness of local cgroup */
-                                                   get_swappiness(next_mem));
+                ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap,
+                                                   get_swappiness(victim));
+                css_put(&victim->css);
+                total += ret;
                if (mem_cgroup_check_under_limit(root_mem))
-                        return 1;       /* indicate reclaim has succeeded */
+                        return 1 + total;
-                next_mem = mem_cgroup_get_next_node(root_mem);
        }
-        return ret;
+        return total;
 }
 bool mem_cgroup_oom_called(struct task_struct *task)
@@ -1324,8 +1269,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
        res_counter_uncharge(&mem->res, PAGE_SIZE);
        if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
                res_counter_uncharge(&mem->memsw, PAGE_SIZE);
        mem_cgroup_charge_statistics(mem, pc, false);
        ClearPageCgroupUsed(pc);
        /*
         * pc->mem_cgroup is not cleared here. It will be accessed when it's
@@ -2178,6 +2123,8 @@ static void __mem_cgroup_free(struct mem_cgroup *mem)
 {
        int node;
+        free_css_id(&mem_cgroup_subsys, &mem->css);
        for_each_node_state(node, N_POSSIBLE)
                free_mem_cgroup_per_zone_info(mem, node);
@@ -2228,11 +2175,12 @@ static struct cgroup_subsys_state * __ref
 mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
 {
        struct mem_cgroup *mem, *parent;
+        long error = -ENOMEM;
        int node;
        mem = mem_cgroup_alloc();
        if (!mem)
-                return ERR_PTR(-ENOMEM);
+                return ERR_PTR(error);
        for_each_node_state(node, N_POSSIBLE)
                if (alloc_mem_cgroup_per_zone_info(mem, node))
@@ -2260,7 +2208,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
                res_counter_init(&mem->res, NULL);
                res_counter_init(&mem->memsw, NULL);
        }
-        mem->last_scanned_child = NULL;
+        mem->last_scanned_child = 0;
        spin_lock_init(&mem->reclaim_param_lock);
        if (parent)
@@ -2269,7 +2217,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
        return &mem->css;
 free_out:
        __mem_cgroup_free(mem);
-        return ERR_PTR(-ENOMEM);
+        return ERR_PTR(error);
 }
 static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
@@ -2284,12 +2232,7 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss,
                                struct cgroup *cont)
 {
        struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
-        struct mem_cgroup *last_scanned_child = mem->last_scanned_child;
-        if (last_scanned_child) {
-                VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child));
-                mem_cgroup_put(last_scanned_child);
-        }
        mem_cgroup_put(mem);
 }
@@ -2328,6 +2271,7 @@ struct cgroup_subsys mem_cgroup_subsys = {
        .populate = mem_cgroup_populate,
        .attach = mem_cgroup_move_task,
        .early_init = 0,
+        .use_id = 1,
 };
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
author	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>	2009-04-02 19:57:33 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-04-02 22:04:55 -0400
commit	04046e1a0a34286382e913f8fc461440c21d88e8 (patch)
tree	cab2b8a61e7474d509fbd3ea02e38b7c4137ce4b
parent	b4046f00ee7c1e5615261b496cf7309683275b29 (diff)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8ffec674c5ac..61fd9590c135 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -95,6 +95,15 @@ static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat,
95	return ret;	95	return ret;
96	}	96	}
97		97
		98	static s64 mem_cgroup_local_usage(struct mem_cgroup_stat *stat)
		99	{
		100	s64 ret;
		101
		102	ret = mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_CACHE);
		103	ret += mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_RSS);
		104	return ret;
		105	}
		106
98	/*	107	/*
99	* per-zone information in memory controller.	108	* per-zone information in memory controller.
100	*/	109	*/
@@ -154,9 +163,9 @@ struct mem_cgroup {
154		163
155	/*	164	/*
156	* While reclaiming in a hiearchy, we cache the last child we	165	* While reclaiming in a hiearchy, we cache the last child we
157	* reclaimed from. Protected by hierarchy_mutex	166	* reclaimed from.
158	*/	167	*/
159	struct mem_cgroup *last_scanned_child;	168	int last_scanned_child;
160	/*	169	/*
161	* Should the accounting and control be hierarchical, per subtree?	170	* Should the accounting and control be hierarchical, per subtree?
162	*/	171	*/
@@ -629,103 +638,6 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
629	#define mem_cgroup_from_res_counter(counter, member) \	638	#define mem_cgroup_from_res_counter(counter, member) \
630	container_of(counter, struct mem_cgroup, member)	639	container_of(counter, struct mem_cgroup, member)
631		640
632	/*
633	* This routine finds the DFS walk successor. This routine should be
634	* called with hierarchy_mutex held
635	*/
636	static struct mem_cgroup *
637	__mem_cgroup_get_next_node(struct mem_cgroup curr, struct mem_cgroup root_mem)
638	{
639	struct cgroup cgroup, curr_cgroup, *root_cgroup;
640
641	curr_cgroup = curr->css.cgroup;
642	root_cgroup = root_mem->css.cgroup;
643
644	if (!list_empty(&curr_cgroup->children)) {
645	/*
646	* Walk down to children
647	*/
648	cgroup = list_entry(curr_cgroup->children.next,
649	struct cgroup, sibling);
650	curr = mem_cgroup_from_cont(cgroup);
651	goto done;
652	}
653
654	visit_parent:
655	if (curr_cgroup == root_cgroup) {
656	/* caller handles NULL case */
657	curr = NULL;
658	goto done;
659	}
660
661	/*
662	* Goto next sibling
663	*/
664	if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) {
665	cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup,
666	sibling);
667	curr = mem_cgroup_from_cont(cgroup);
668	goto done;
669	}
670
671	/*
672	* Go up to next parent and next parent's sibling if need be
673	*/
674	curr_cgroup = curr_cgroup->parent;
675	goto visit_parent;
676
677	done:
678	return curr;
679	}
680
681	/*
682	* Visit the first child (need not be the first child as per the ordering
683	* of the cgroup list, since we track last_scanned_child) of @mem and use
684	* that to reclaim free pages from.
685	*/
686	static struct mem_cgroup *
687	mem_cgroup_get_next_node(struct mem_cgroup *root_mem)
688	{
689	struct cgroup *cgroup;
690	struct mem_cgroup orig, next;
691	bool obsolete;
692
693	/*
694	* Scan all children under the mem_cgroup mem
695	*/
696	mutex_lock(&mem_cgroup_subsys.hierarchy_mutex);
697
698	orig = root_mem->last_scanned_child;
699	obsolete = mem_cgroup_is_obsolete(orig);
700
701	if (list_empty(&root_mem->css.cgroup->children)) {
702	/*
703	* root_mem might have children before and last_scanned_child
704	* may point to one of them. We put it later.
705	*/
706	if (orig)
707	VM_BUG_ON(!obsolete);
708	next = NULL;
709	goto done;
710	}
711
712	if (!orig \|\| obsolete) {
713	cgroup = list_first_entry(&root_mem->css.cgroup->children,
714	struct cgroup, sibling);
715	next = mem_cgroup_from_cont(cgroup);
716	} else
717	next = __mem_cgroup_get_next_node(orig, root_mem);
718
719	done:
720	if (next)
721	mem_cgroup_get(next);
722	root_mem->last_scanned_child = next;
723	if (orig)
724	mem_cgroup_put(orig);
725	mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex);
726	return (next) ? next : root_mem;
727	}
728
729	static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)	641	static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
730	{	642	{
731	if (do_swap_account) {	643	if (do_swap_account) {
@@ -755,46 +667,79 @@ static unsigned int get_swappiness(struct mem_cgroup *memcg)
755	}	667	}
756		668
757	/*	669	/*
758	* Dance down the hierarchy if needed to reclaim memory. We remember the	670	* Visit the first child (need not be the first child as per the ordering
759	* last child we reclaimed from, so that we don't end up penalizing	671	* of the cgroup list, since we track last_scanned_child) of @mem and use
760	* one child extensively based on its position in the children list.	672	* that to reclaim free pages from.
		673	*/
		674	static struct mem_cgroup *
		675	mem_cgroup_select_victim(struct mem_cgroup *root_mem)
		676	{
		677	struct mem_cgroup *ret = NULL;
		678	struct cgroup_subsys_state *css;
		679	int nextid, found;
		680
		681	if (!root_mem->use_hierarchy) {
		682	css_get(&root_mem->css);
		683	ret = root_mem;
		684	}
		685
		686	while (!ret) {
		687	rcu_read_lock();
		688	nextid = root_mem->last_scanned_child + 1;
		689	css = css_get_next(&mem_cgroup_subsys, nextid, &root_mem->css,
		690	&found);
		691	if (css && css_tryget(css))
		692	ret = container_of(css, struct mem_cgroup, css);
		693
		694	rcu_read_unlock();
		695	/* Updates scanning parameter */
		696	spin_lock(&root_mem->reclaim_param_lock);
		697	if (!css) {
		698	/* this means start scan from ID:1 */
		699	root_mem->last_scanned_child = 0;
		700	} else
		701	root_mem->last_scanned_child = found;
		702	spin_unlock(&root_mem->reclaim_param_lock);
		703	}
		704
		705	return ret;
		706	}
		707
		708	/*
		709	* Scan the hierarchy if needed to reclaim memory. We remember the last child
		710	* we reclaimed from, so that we don't end up penalizing one child extensively
		711	* based on its position in the children list.
761	*	712	*
762	* root_mem is the original ancestor that we've been reclaim from.	713	* root_mem is the original ancestor that we've been reclaim from.
		714	*
		715	* We give up and return to the caller when we visit root_mem twice.
		716	* (other groups can be removed while we're walking....)
763	*/	717	*/
764	static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,	718	static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem,
765	gfp_t gfp_mask, bool noswap)	719	gfp_t gfp_mask, bool noswap)
766	{	720	{
767	struct mem_cgroup *next_mem;	721	struct mem_cgroup *victim;
768	int ret = 0;	722	int ret, total = 0;
769		723	int loop = 0;
770	/*	724
771	* Reclaim unconditionally and don't check for return value.	725	while (loop < 2) {
772	* We need to reclaim in the current group and down the tree.	726	victim = mem_cgroup_select_victim(root_mem);
773	* One might think about checking for children before reclaiming,	727	if (victim == root_mem)
774	* but there might be left over accounting, even after children	728	loop++;
775	* have left.	729	if (!mem_cgroup_local_usage(&victim->stat)) {
776	*/	730	/* this cgroup's local usage == 0 */
777	ret += try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap,	731	css_put(&victim->css);
778	get_swappiness(root_mem));
779	if (mem_cgroup_check_under_limit(root_mem))
780	return 1; /* indicate reclaim has succeeded */
781	if (!root_mem->use_hierarchy)
782	return ret;
783
784	next_mem = mem_cgroup_get_next_node(root_mem);
785
786	while (next_mem != root_mem) {
787	if (mem_cgroup_is_obsolete(next_mem)) {
788	next_mem = mem_cgroup_get_next_node(root_mem);
789	continue;	732	continue;
790	}	733	}
791	ret += try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap,	734	/* we use swappiness of local cgroup */
792	get_swappiness(next_mem));	735	ret = try_to_free_mem_cgroup_pages(victim, gfp_mask, noswap,
		736	get_swappiness(victim));
		737	css_put(&victim->css);
		738	total += ret;
793	if (mem_cgroup_check_under_limit(root_mem))	739	if (mem_cgroup_check_under_limit(root_mem))
794	return 1; /* indicate reclaim has succeeded */	740	return 1 + total;
795	next_mem = mem_cgroup_get_next_node(root_mem);
796	}	741	}
797	return ret;	742	return total;
798	}	743	}
799		744
800	bool mem_cgroup_oom_called(struct task_struct *task)	745	bool mem_cgroup_oom_called(struct task_struct *task)
@@ -1324,8 +1269,8 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype)
1324	res_counter_uncharge(&mem->res, PAGE_SIZE);	1269	res_counter_uncharge(&mem->res, PAGE_SIZE);
1325	if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))	1270	if (do_swap_account && (ctype != MEM_CGROUP_CHARGE_TYPE_SWAPOUT))
1326	res_counter_uncharge(&mem->memsw, PAGE_SIZE);	1271	res_counter_uncharge(&mem->memsw, PAGE_SIZE);
1327
1328	mem_cgroup_charge_statistics(mem, pc, false);	1272	mem_cgroup_charge_statistics(mem, pc, false);
		1273
1329	ClearPageCgroupUsed(pc);	1274	ClearPageCgroupUsed(pc);
1330	/*	1275	/*
1331	* pc->mem_cgroup is not cleared here. It will be accessed when it's	1276	* pc->mem_cgroup is not cleared here. It will be accessed when it's
@@ -2178,6 +2123,8 @@ static void __mem_cgroup_free(struct mem_cgroup *mem)
2178	{	2123	{
2179	int node;	2124	int node;
2180		2125
		2126	free_css_id(&mem_cgroup_subsys, &mem->css);
		2127
2181	for_each_node_state(node, N_POSSIBLE)	2128	for_each_node_state(node, N_POSSIBLE)
2182	free_mem_cgroup_per_zone_info(mem, node);	2129	free_mem_cgroup_per_zone_info(mem, node);
2183		2130
@@ -2228,11 +2175,12 @@ static struct cgroup_subsys_state * __ref
2228	mem_cgroup_create(struct cgroup_subsys ss, struct cgroup cont)	2175	mem_cgroup_create(struct cgroup_subsys ss, struct cgroup cont)
2229	{	2176	{
2230	struct mem_cgroup mem, parent;	2177	struct mem_cgroup mem, parent;
		2178	long error = -ENOMEM;
2231	int node;	2179	int node;
2232		2180
2233	mem = mem_cgroup_alloc();	2181	mem = mem_cgroup_alloc();
2234	if (!mem)	2182	if (!mem)
2235	return ERR_PTR(-ENOMEM);	2183	return ERR_PTR(error);
2236		2184
2237	for_each_node_state(node, N_POSSIBLE)	2185	for_each_node_state(node, N_POSSIBLE)
2238	if (alloc_mem_cgroup_per_zone_info(mem, node))	2186	if (alloc_mem_cgroup_per_zone_info(mem, node))
@@ -2260,7 +2208,7 @@ mem_cgroup_create(struct cgroup_subsys ss, struct cgroup cont)
2260	res_counter_init(&mem->res, NULL);	2208	res_counter_init(&mem->res, NULL);
2261	res_counter_init(&mem->memsw, NULL);	2209	res_counter_init(&mem->memsw, NULL);
2262	}	2210	}
2263	mem->last_scanned_child = NULL;	2211	mem->last_scanned_child = 0;
2264	spin_lock_init(&mem->reclaim_param_lock);	2212	spin_lock_init(&mem->reclaim_param_lock);
2265		2213
2266	if (parent)	2214	if (parent)
@@ -2269,7 +2217,7 @@ mem_cgroup_create(struct cgroup_subsys ss, struct cgroup cont)
2269	return &mem->css;	2217	return &mem->css;
2270	free_out:	2218	free_out:
2271	__mem_cgroup_free(mem);	2219	__mem_cgroup_free(mem);
2272	return ERR_PTR(-ENOMEM);	2220	return ERR_PTR(error);
2273	}	2221	}
2274		2222
2275	static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss,	2223	static int mem_cgroup_pre_destroy(struct cgroup_subsys *ss,
@@ -2284,12 +2232,7 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss,
2284	struct cgroup *cont)	2232	struct cgroup *cont)
2285	{	2233	{
2286	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);	2234	struct mem_cgroup *mem = mem_cgroup_from_cont(cont);
2287	struct mem_cgroup *last_scanned_child = mem->last_scanned_child;
2288		2235
2289	if (last_scanned_child) {
2290	VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child));
2291	mem_cgroup_put(last_scanned_child);
2292	}
2293	mem_cgroup_put(mem);	2236	mem_cgroup_put(mem);
2294	}	2237	}
2295		2238
@@ -2328,6 +2271,7 @@ struct cgroup_subsys mem_cgroup_subsys = {
2328	.populate = mem_cgroup_populate,	2271	.populate = mem_cgroup_populate,
2329	.attach = mem_cgroup_move_task,	2272	.attach = mem_cgroup_move_task,
2330	.early_init = 0,	2273	.early_init = 0,
		2274	.use_id = 1,
2331	};	2275	};
2332		2276
2333	#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP	2277	#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP