bugfix for memory controller: add helper function for assigning cgroup to page

This patch adds following functions. - clear_page_cgroup(page, pc) - page_cgroup_assign_new_page_group(page, pc) Mainly for cleanup. A manner "check page->cgroup again after lock_page_cgroup()" is implemented in straight way. A comment in mem_cgroup_uncharge() will be removed by force-empty patch Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Pavel Emelianov <xemul@openvz.org> Cc: Paul Menage <menage@google.com> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Kirill Korotaev <dev@sw.ru> Cc: Herbert Poetzl <herbert@13thfloor.at> Cc: David Rientjes <rientjes@google.com> Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> 2008-02-07 03:14:08 -0500
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2008-02-07 11:42:19 -0500
commit: 9175e0311ec9e6d1bf1f6dfecf9268baf08765e6 (patch)
tree: 36ca9f47f8bc18345980270dc9d355f7c107cf6e /mm
parent: f1a9ee758de7de1e040de849fdef46e6802ea117 (diff)
1 files changed, 76 insertions, 29 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2fadd4896a14..3270ce7375db 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -162,6 +162,48 @@ static void __always_inline unlock_page_cgroup(struct page *page)
        bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
 }
+/*
+ * Tie new page_cgroup to struct page under lock_page_cgroup()
+ * This can fail if the page has been tied to a page_cgroup.
+ * If success, returns 0.
+ */
+static inline int
+page_cgroup_assign_new_page_cgroup(struct page *page, struct page_cgroup *pc)
+{
+        int ret = 0;
+        lock_page_cgroup(page);
+        if (!page_get_page_cgroup(page))
+                page_assign_page_cgroup(page, pc);
+        else /* A page is tied to other pc. */
+                ret = 1;
+        unlock_page_cgroup(page);
+        return ret;
+}
+/*
+ * Clear page->page_cgroup member under lock_page_cgroup().
+ * If given "pc" value is different from one page->page_cgroup,
+ * page->cgroup is not cleared.
+ * Returns a value of page->page_cgroup at lock taken.
+ * A can can detect failure of clearing by following
+ *  clear_page_cgroup(page, pc) == pc
+ */
+static inline struct page_cgroup *
+clear_page_cgroup(struct page *page, struct page_cgroup *pc)
+{
+        struct page_cgroup *ret;
+        /* lock and clear */
+        lock_page_cgroup(page);
+        ret = page_get_page_cgroup(page);
+        if (likely(ret == pc))
+                page_assign_page_cgroup(page, NULL);
+        unlock_page_cgroup(page);
+        return ret;
+}
 static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
 {
        if (active)
@@ -270,7 +312,7 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
                                gfp_t gfp_mask)
 {
        struct mem_cgroup *mem;
-        struct page_cgroup *pc, *race_pc;
+        struct page_cgroup *pc;
        unsigned long flags;
        unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
@@ -293,8 +335,10 @@ retry:
                        unlock_page_cgroup(page);
                        cpu_relax();
                        goto retry;
-                } else
+                } else {
+                        unlock_page_cgroup(page);
                        goto done;
+                }
        }
        unlock_page_cgroup(page);
@@ -364,31 +408,26 @@ noreclaim:
                goto free_pc;
        }
-        lock_page_cgroup(page);
-        /*
-         * Check if somebody else beat us to allocating the page_cgroup
-         */
-        race_pc = page_get_page_cgroup(page);
-        if (race_pc) {
-                kfree(pc);
-                pc = race_pc;
-                atomic_inc(&pc->ref_cnt);
-                res_counter_uncharge(&mem->res, PAGE_SIZE);
-                css_put(&mem->css);
-                goto done;
-        }
        atomic_set(&pc->ref_cnt, 1);
        pc->mem_cgroup = mem;
        pc->page = page;
-        page_assign_page_cgroup(page, pc);
+        if (page_cgroup_assign_new_page_cgroup(page, pc)) {
+                /*
+                 * an another charge is added to this page already.
+                 * we do take lock_page_cgroup(page) again and read
+                 * page->cgroup, increment refcnt.... just retry is OK.
+                 */
+                res_counter_uncharge(&mem->res, PAGE_SIZE);
+                css_put(&mem->css);
+                kfree(pc);
+                goto retry;
+        }
        spin_lock_irqsave(&mem->lru_lock, flags);
        list_add(&pc->lru, &mem->active_list);
        spin_unlock_irqrestore(&mem->lru_lock, flags);
 done:
-        unlock_page_cgroup(page);
        return 0;
 free_pc:
        kfree(pc);
@@ -432,17 +471,25 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
        if (atomic_dec_and_test(&pc->ref_cnt)) {
                page = pc->page;
-                lock_page_cgroup(page);
+                /*
-                mem = pc->mem_cgroup;
+                 * get page->cgroup and clear it under lock.
-                css_put(&mem->css);
+                 */
-                page_assign_page_cgroup(page, NULL);
+                if (clear_page_cgroup(page, pc) == pc) {
-                unlock_page_cgroup(page);
+                        mem = pc->mem_cgroup;
-                res_counter_uncharge(&mem->res, PAGE_SIZE);
+                        css_put(&mem->css);
+                        res_counter_uncharge(&mem->res, PAGE_SIZE);
-                spin_lock_irqsave(&mem->lru_lock, flags);
+                        spin_lock_irqsave(&mem->lru_lock, flags);
-                list_del_init(&pc->lru);
+                        list_del_init(&pc->lru);
-                spin_unlock_irqrestore(&mem->lru_lock, flags);
+                        spin_unlock_irqrestore(&mem->lru_lock, flags);
-                kfree(pc);
+                        kfree(pc);
+                } else {
+                        /*
+                         * Note:This will be removed when force-empty patch is
+                         * applied. just show warning here.
+                         */
+                        printk(KERN_ERR "Race in mem_cgroup_uncharge() ?");
+                        dump_stack();
+                }
        }
 }
author	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>	2008-02-07 03:14:08 -0500
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2008-02-07 11:42:19 -0500
commit	9175e0311ec9e6d1bf1f6dfecf9268baf08765e6 (patch)
tree	36ca9f47f8bc18345980270dc9d355f7c107cf6e /mm
parent	f1a9ee758de7de1e040de849fdef46e6802ea117 (diff)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2fadd4896a14..3270ce7375db 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -162,6 +162,48 @@ static void __always_inline unlock_page_cgroup(struct page *page)
162	bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);	162	bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
163	}	163	}
164		164
		165	/*
		166	* Tie new page_cgroup to struct page under lock_page_cgroup()
		167	* This can fail if the page has been tied to a page_cgroup.
		168	* If success, returns 0.
		169	*/
		170	static inline int
		171	page_cgroup_assign_new_page_cgroup(struct page page, struct page_cgroup pc)
		172	{
		173	int ret = 0;
		174
		175	lock_page_cgroup(page);
		176	if (!page_get_page_cgroup(page))
		177	page_assign_page_cgroup(page, pc);
		178	else /* A page is tied to other pc. */
		179	ret = 1;
		180	unlock_page_cgroup(page);
		181	return ret;
		182	}
		183
		184	/*
		185	* Clear page->page_cgroup member under lock_page_cgroup().
		186	* If given "pc" value is different from one page->page_cgroup,
		187	* page->cgroup is not cleared.
		188	* Returns a value of page->page_cgroup at lock taken.
		189	* A can can detect failure of clearing by following
		190	* clear_page_cgroup(page, pc) == pc
		191	*/
		192
		193	static inline struct page_cgroup *
		194	clear_page_cgroup(struct page page, struct page_cgroup pc)
		195	{
		196	struct page_cgroup *ret;
		197	/* lock and clear */
		198	lock_page_cgroup(page);
		199	ret = page_get_page_cgroup(page);
		200	if (likely(ret == pc))
		201	page_assign_page_cgroup(page, NULL);
		202	unlock_page_cgroup(page);
		203	return ret;
		204	}
		205
		206
165	static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)	207	static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
166	{	208	{
167	if (active)	209	if (active)
@@ -270,7 +312,7 @@ int mem_cgroup_charge(struct page page, struct mm_struct mm,
270	gfp_t gfp_mask)	312	gfp_t gfp_mask)
271	{	313	{
272	struct mem_cgroup *mem;	314	struct mem_cgroup *mem;
273	struct page_cgroup pc, race_pc;	315	struct page_cgroup *pc;
274	unsigned long flags;	316	unsigned long flags;
275	unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;	317	unsigned long nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
276		318
@@ -293,8 +335,10 @@ retry:
293	unlock_page_cgroup(page);	335	unlock_page_cgroup(page);
294	cpu_relax();	336	cpu_relax();
295	goto retry;	337	goto retry;
296	} else	338	} else {
		339	unlock_page_cgroup(page);
297	goto done;	340	goto done;
		341	}
298	}	342	}
299		343
300	unlock_page_cgroup(page);	344	unlock_page_cgroup(page);
@@ -364,31 +408,26 @@ noreclaim:
364	goto free_pc;	408	goto free_pc;
365	}	409	}
366		410
367	lock_page_cgroup(page);
368	/*
369	* Check if somebody else beat us to allocating the page_cgroup
370	*/
371	race_pc = page_get_page_cgroup(page);
372	if (race_pc) {
373	kfree(pc);
374	pc = race_pc;
375	atomic_inc(&pc->ref_cnt);
376	res_counter_uncharge(&mem->res, PAGE_SIZE);
377	css_put(&mem->css);
378	goto done;
379	}
380
381	atomic_set(&pc->ref_cnt, 1);	411	atomic_set(&pc->ref_cnt, 1);
382	pc->mem_cgroup = mem;	412	pc->mem_cgroup = mem;
383	pc->page = page;	413	pc->page = page;
384	page_assign_page_cgroup(page, pc);	414	if (page_cgroup_assign_new_page_cgroup(page, pc)) {
		415	/*
		416	* an another charge is added to this page already.
		417	* we do take lock_page_cgroup(page) again and read
		418	* page->cgroup, increment refcnt.... just retry is OK.
		419	*/
		420	res_counter_uncharge(&mem->res, PAGE_SIZE);
		421	css_put(&mem->css);
		422	kfree(pc);
		423	goto retry;
		424	}
385		425
386	spin_lock_irqsave(&mem->lru_lock, flags);	426	spin_lock_irqsave(&mem->lru_lock, flags);
387	list_add(&pc->lru, &mem->active_list);	427	list_add(&pc->lru, &mem->active_list);
388	spin_unlock_irqrestore(&mem->lru_lock, flags);	428	spin_unlock_irqrestore(&mem->lru_lock, flags);
389		429
390	done:	430	done:
391	unlock_page_cgroup(page);
392	return 0;	431	return 0;
393	free_pc:	432	free_pc:
394	kfree(pc);	433	kfree(pc);
@@ -432,17 +471,25 @@ void mem_cgroup_uncharge(struct page_cgroup *pc)
432		471
433	if (atomic_dec_and_test(&pc->ref_cnt)) {	472	if (atomic_dec_and_test(&pc->ref_cnt)) {
434	page = pc->page;	473	page = pc->page;
435	lock_page_cgroup(page);	474	/*
436	mem = pc->mem_cgroup;	475	* get page->cgroup and clear it under lock.
437	css_put(&mem->css);	476	*/
438	page_assign_page_cgroup(page, NULL);	477	if (clear_page_cgroup(page, pc) == pc) {
439	unlock_page_cgroup(page);	478	mem = pc->mem_cgroup;
440	res_counter_uncharge(&mem->res, PAGE_SIZE);	479	css_put(&mem->css);
441		480	res_counter_uncharge(&mem->res, PAGE_SIZE);
442	spin_lock_irqsave(&mem->lru_lock, flags);	481	spin_lock_irqsave(&mem->lru_lock, flags);
443	list_del_init(&pc->lru);	482	list_del_init(&pc->lru);
444	spin_unlock_irqrestore(&mem->lru_lock, flags);	483	spin_unlock_irqrestore(&mem->lru_lock, flags);
445	kfree(pc);	484	kfree(pc);
		485	} else {
		486	/*
		487	* Note:This will be removed when force-empty patch is
		488	* applied. just show warning here.
		489	*/
		490	printk(KERN_ERR "Race in mem_cgroup_uncharge() ?");
		491	dump_stack();
		492	}
446	}	493	}
447	}	494	}
448		495