memcg: remove clear_page_cgroup and atomics

Remove clear_page_cgroup: it's an unhelpful helper, see for example how mem_cgroup_uncharge_page had to unlock_page_cgroup just in order to call it (serious races from that? I'm not sure). Once that's gone, you can see it's pointless for page_cgroup's ref_cnt to be atomic: it's always manipulated under lock_page_cgroup, except where force_empty unilaterally reset it to 0 (and how does uncharge's atomic_dec_and_test protect against that?). Simplify this page_cgroup locking: if you've got the lock and the pc is attached, then the ref_cnt must be positive: VM_BUG_ONs to check that, and to check that pc->page matches page (we're on the way to finding why sometimes it doesn't, but this patch doesn't fix that). Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: David Rientjes <rientjes@google.com> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Hirokazu Takahashi <taka@valinux.co.jp> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: Paul Menage <menage@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Hugh Dickins <hugh@veritas.com> 2008-03-04 17:29:11 -0500
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2008-03-04 19:35:15 -0500
commit: b9c565d5a29a795f970b4a1340393d8fc6722fb9 (patch)
tree: cbd1e0c762738dee438dfd1093a1bb21ce56664b /mm
parent: d5b69e38f8cdb1e41cc022305c86c9739bf1ffdb (diff)
1 files changed, 43 insertions, 63 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a59f946c9338..13e9e7d8e49e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -161,8 +161,7 @@ struct page_cgroup {
        struct list_head lru;           /* per cgroup LRU list */
        struct page *page;
        struct mem_cgroup *mem_cgroup;
-        atomic_t ref_cnt;               /* Helpful when pages move b/w  */
+        int ref_cnt;                    /* cached, mapped, migrating */
-                                        /* mapped and cached states     */
        int flags;
 };
 #define PAGE_CGROUP_FLAG_CACHE  (0x1)   /* charged as cache */
@@ -283,27 +282,6 @@ static void unlock_page_cgroup(struct page *page)
        bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
 }
-/*
- * Clear page->page_cgroup member under lock_page_cgroup().
- * If given "pc" value is different from one page->page_cgroup,
- * page->cgroup is not cleared.
- * Returns a value of page->page_cgroup at lock taken.
- * A can can detect failure of clearing by following
- *  clear_page_cgroup(page, pc) == pc
- */
-static struct page_cgroup *clear_page_cgroup(struct page *page,
-                                                struct page_cgroup *pc)
-{
-        struct page_cgroup *ret;
-        /* lock and clear */
-        lock_page_cgroup(page);
-        ret = page_get_page_cgroup(page);
-        if (likely(ret == pc))
-                page_assign_page_cgroup(page, NULL);
-        unlock_page_cgroup(page);
-        return ret;
-}
 static void __mem_cgroup_remove_list(struct page_cgroup *pc)
 {
        int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
@@ -555,15 +533,12 @@ retry:
         * the page has already been accounted.
         */
        if (pc) {
-                if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) {
+                VM_BUG_ON(pc->page != page);
-                        /* this page is under being uncharged ? */
+                VM_BUG_ON(pc->ref_cnt <= 0);
-                        unlock_page_cgroup(page);
-                        cpu_relax();
+                pc->ref_cnt++;
-                        goto retry;
+                unlock_page_cgroup(page);
-                } else {
+                goto done;
-                        unlock_page_cgroup(page);
-                        goto done;
-                }
        }
        unlock_page_cgroup(page);
@@ -612,7 +587,7 @@ retry:
                congestion_wait(WRITE, HZ/10);
        }
-        atomic_set(&pc->ref_cnt, 1);
+        pc->ref_cnt = 1;
        pc->mem_cgroup = mem;
        pc->page = page;
        pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
@@ -683,24 +658,24 @@ void mem_cgroup_uncharge_page(struct page *page)
        if (!pc)
                goto unlock;
-        if (atomic_dec_and_test(&pc->ref_cnt)) {
+        VM_BUG_ON(pc->page != page);
-                page = pc->page;
+        VM_BUG_ON(pc->ref_cnt <= 0);
-                mz = page_cgroup_zoneinfo(pc);
-                /*
+        if (--(pc->ref_cnt) == 0) {
-                 * get page->cgroup and clear it under lock.
+                page_assign_page_cgroup(page, NULL);
-                 * force_empty can drop page->cgroup without checking refcnt.
-                 */
                unlock_page_cgroup(page);
-                if (clear_page_cgroup(page, pc) == pc) {
-                        mem = pc->mem_cgroup;
+                mem = pc->mem_cgroup;
-                        css_put(&mem->css);
+                css_put(&mem->css);
-                        res_counter_uncharge(&mem->res, PAGE_SIZE);
+                res_counter_uncharge(&mem->res, PAGE_SIZE);
-                        spin_lock_irqsave(&mz->lru_lock, flags);
-                        __mem_cgroup_remove_list(pc);
+                mz = page_cgroup_zoneinfo(pc);
-                        spin_unlock_irqrestore(&mz->lru_lock, flags);
+                spin_lock_irqsave(&mz->lru_lock, flags);
-                        kfree(pc);
+                __mem_cgroup_remove_list(pc);
-                }
+                spin_unlock_irqrestore(&mz->lru_lock, flags);
-                lock_page_cgroup(page);
+                kfree(pc);
+                return;
        }
 unlock:
@@ -714,14 +689,13 @@ unlock:
 int mem_cgroup_prepare_migration(struct page *page)
 {
        struct page_cgroup *pc;
-        int ret = 0;
        lock_page_cgroup(page);
        pc = page_get_page_cgroup(page);
-        if (pc && atomic_inc_not_zero(&pc->ref_cnt))
+        if (pc)
-                ret = 1;
+                pc->ref_cnt++;
        unlock_page_cgroup(page);
-        return ret;
+        return pc != NULL;
 }
 void mem_cgroup_end_migration(struct page *page)
@@ -740,15 +714,17 @@ void mem_cgroup_page_migration(struct page *page, struct page *newpage)
        struct mem_cgroup_per_zone *mz;
        unsigned long flags;
-retry:
+        lock_page_cgroup(page);
        pc = page_get_page_cgroup(page);
-        if (!pc)
+        if (!pc) {
+                unlock_page_cgroup(page);
                return;
+        }
-        mz = page_cgroup_zoneinfo(pc);
+        page_assign_page_cgroup(page, NULL);
-        if (clear_page_cgroup(page, pc) != pc)
+        unlock_page_cgroup(page);
-                goto retry;
+        mz = page_cgroup_zoneinfo(pc);
        spin_lock_irqsave(&mz->lru_lock, flags);
        __mem_cgroup_remove_list(pc);
        spin_unlock_irqrestore(&mz->lru_lock, flags);
@@ -794,15 +770,19 @@ retry:
        while (--count && !list_empty(list)) {
                pc = list_entry(list->prev, struct page_cgroup, lru);
                page = pc->page;
-                /* Avoid race with charge */
+                lock_page_cgroup(page);
-                atomic_set(&pc->ref_cnt, 0);
+                if (page_get_page_cgroup(page) == pc) {
-                if (clear_page_cgroup(page, pc) == pc) {
+                        page_assign_page_cgroup(page, NULL);
+                        unlock_page_cgroup(page);
                        css_put(&mem->css);
                        res_counter_uncharge(&mem->res, PAGE_SIZE);
                        __mem_cgroup_remove_list(pc);
                        kfree(pc);
-                } else  /* being uncharged ? ...do relax */
+                } else {
+                        /* racing uncharge: let page go then retry */
+                        unlock_page_cgroup(page);
                        break;
+                }
        }
        spin_unlock_irqrestore(&mz->lru_lock, flags);
author	Hugh Dickins <hugh@veritas.com>	2008-03-04 17:29:11 -0500
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2008-03-04 19:35:15 -0500
commit	b9c565d5a29a795f970b4a1340393d8fc6722fb9 (patch)
tree	cbd1e0c762738dee438dfd1093a1bb21ce56664b /mm
parent	d5b69e38f8cdb1e41cc022305c86c9739bf1ffdb (diff)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a59f946c9338..13e9e7d8e49e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -161,8 +161,7 @@ struct page_cgroup {
161	struct list_head lru; /* per cgroup LRU list */	161	struct list_head lru; /* per cgroup LRU list */
162	struct page *page;	162	struct page *page;
163	struct mem_cgroup *mem_cgroup;	163	struct mem_cgroup *mem_cgroup;
164	atomic_t ref_cnt; /* Helpful when pages move b/w */	164	int ref_cnt; /* cached, mapped, migrating */
165	/* mapped and cached states */
166	int flags;	165	int flags;
167	};	166	};
168	#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */	167	#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
@@ -283,27 +282,6 @@ static void unlock_page_cgroup(struct page *page)
283	bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);	282	bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
284	}	283	}
285		284
286	/*
287	* Clear page->page_cgroup member under lock_page_cgroup().
288	* If given "pc" value is different from one page->page_cgroup,
289	* page->cgroup is not cleared.
290	* Returns a value of page->page_cgroup at lock taken.
291	* A can can detect failure of clearing by following
292	* clear_page_cgroup(page, pc) == pc
293	*/
294	static struct page_cgroup clear_page_cgroup(struct page page,
295	struct page_cgroup *pc)
296	{
297	struct page_cgroup *ret;
298	/* lock and clear */
299	lock_page_cgroup(page);
300	ret = page_get_page_cgroup(page);
301	if (likely(ret == pc))
302	page_assign_page_cgroup(page, NULL);
303	unlock_page_cgroup(page);
304	return ret;
305	}
306
307	static void __mem_cgroup_remove_list(struct page_cgroup *pc)	285	static void __mem_cgroup_remove_list(struct page_cgroup *pc)
308	{	286	{
309	int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;	287	int from = pc->flags & PAGE_CGROUP_FLAG_ACTIVE;
@@ -555,15 +533,12 @@ retry:
555	* the page has already been accounted.	533	* the page has already been accounted.
556	*/	534	*/
557	if (pc) {	535	if (pc) {
558	if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) {	536	VM_BUG_ON(pc->page != page);
559	/* this page is under being uncharged ? */	537	VM_BUG_ON(pc->ref_cnt <= 0);
560	unlock_page_cgroup(page);	538
561	cpu_relax();	539	pc->ref_cnt++;
562	goto retry;	540	unlock_page_cgroup(page);
563	} else {	541	goto done;
564	unlock_page_cgroup(page);
565	goto done;
566	}
567	}	542	}
568	unlock_page_cgroup(page);	543	unlock_page_cgroup(page);
569		544
@@ -612,7 +587,7 @@ retry:
612	congestion_wait(WRITE, HZ/10);	587	congestion_wait(WRITE, HZ/10);
613	}	588	}
614		589
615	atomic_set(&pc->ref_cnt, 1);	590	pc->ref_cnt = 1;
616	pc->mem_cgroup = mem;	591	pc->mem_cgroup = mem;
617	pc->page = page;	592	pc->page = page;
618	pc->flags = PAGE_CGROUP_FLAG_ACTIVE;	593	pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
@@ -683,24 +658,24 @@ void mem_cgroup_uncharge_page(struct page *page)
683	if (!pc)	658	if (!pc)
684	goto unlock;	659	goto unlock;
685		660
686	if (atomic_dec_and_test(&pc->ref_cnt)) {	661	VM_BUG_ON(pc->page != page);
687	page = pc->page;	662	VM_BUG_ON(pc->ref_cnt <= 0);
688	mz = page_cgroup_zoneinfo(pc);	663
689	/*	664	if (--(pc->ref_cnt) == 0) {
690	* get page->cgroup and clear it under lock.	665	page_assign_page_cgroup(page, NULL);
691	* force_empty can drop page->cgroup without checking refcnt.
692	*/
693	unlock_page_cgroup(page);	666	unlock_page_cgroup(page);
694	if (clear_page_cgroup(page, pc) == pc) {	667
695	mem = pc->mem_cgroup;	668	mem = pc->mem_cgroup;
696	css_put(&mem->css);	669	css_put(&mem->css);
697	res_counter_uncharge(&mem->res, PAGE_SIZE);	670	res_counter_uncharge(&mem->res, PAGE_SIZE);
698	spin_lock_irqsave(&mz->lru_lock, flags);	671
699	__mem_cgroup_remove_list(pc);	672	mz = page_cgroup_zoneinfo(pc);
700	spin_unlock_irqrestore(&mz->lru_lock, flags);	673	spin_lock_irqsave(&mz->lru_lock, flags);
701	kfree(pc);	674	__mem_cgroup_remove_list(pc);
702	}	675	spin_unlock_irqrestore(&mz->lru_lock, flags);
703	lock_page_cgroup(page);	676
		677	kfree(pc);
		678	return;
704	}	679	}
705		680
706	unlock:	681	unlock:
@@ -714,14 +689,13 @@ unlock:
714	int mem_cgroup_prepare_migration(struct page *page)	689	int mem_cgroup_prepare_migration(struct page *page)
715	{	690	{
716	struct page_cgroup *pc;	691	struct page_cgroup *pc;
717	int ret = 0;
718		692
719	lock_page_cgroup(page);	693	lock_page_cgroup(page);
720	pc = page_get_page_cgroup(page);	694	pc = page_get_page_cgroup(page);
721	if (pc && atomic_inc_not_zero(&pc->ref_cnt))	695	if (pc)
722	ret = 1;	696	pc->ref_cnt++;
723	unlock_page_cgroup(page);	697	unlock_page_cgroup(page);
724	return ret;	698	return pc != NULL;
725	}	699	}
726		700
727	void mem_cgroup_end_migration(struct page *page)	701	void mem_cgroup_end_migration(struct page *page)
@@ -740,15 +714,17 @@ void mem_cgroup_page_migration(struct page page, struct page newpage)
740	struct mem_cgroup_per_zone *mz;	714	struct mem_cgroup_per_zone *mz;
741	unsigned long flags;	715	unsigned long flags;
742		716
743	retry:	717	lock_page_cgroup(page);
744	pc = page_get_page_cgroup(page);	718	pc = page_get_page_cgroup(page);
745	if (!pc)	719	if (!pc) {
		720	unlock_page_cgroup(page);
746	return;	721	return;
		722	}
747		723
748	mz = page_cgroup_zoneinfo(pc);	724	page_assign_page_cgroup(page, NULL);
749	if (clear_page_cgroup(page, pc) != pc)	725	unlock_page_cgroup(page);
750	goto retry;
751		726
		727	mz = page_cgroup_zoneinfo(pc);
752	spin_lock_irqsave(&mz->lru_lock, flags);	728	spin_lock_irqsave(&mz->lru_lock, flags);
753	__mem_cgroup_remove_list(pc);	729	__mem_cgroup_remove_list(pc);
754	spin_unlock_irqrestore(&mz->lru_lock, flags);	730	spin_unlock_irqrestore(&mz->lru_lock, flags);
@@ -794,15 +770,19 @@ retry:
794	while (--count && !list_empty(list)) {	770	while (--count && !list_empty(list)) {
795	pc = list_entry(list->prev, struct page_cgroup, lru);	771	pc = list_entry(list->prev, struct page_cgroup, lru);
796	page = pc->page;	772	page = pc->page;
797	/* Avoid race with charge */	773	lock_page_cgroup(page);
798	atomic_set(&pc->ref_cnt, 0);	774	if (page_get_page_cgroup(page) == pc) {
799	if (clear_page_cgroup(page, pc) == pc) {	775	page_assign_page_cgroup(page, NULL);
		776	unlock_page_cgroup(page);
800	css_put(&mem->css);	777	css_put(&mem->css);
801	res_counter_uncharge(&mem->res, PAGE_SIZE);	778	res_counter_uncharge(&mem->res, PAGE_SIZE);
802	__mem_cgroup_remove_list(pc);	779	__mem_cgroup_remove_list(pc);
803	kfree(pc);	780	kfree(pc);
804	} else /* being uncharged ? ...do relax */	781	} else {
		782	/* racing uncharge: let page go then retry */
		783	unlock_page_cgroup(page);
805	break;	784	break;
		785	}
806	}	786	}
807		787
808	spin_unlock_irqrestore(&mz->lru_lock, flags);	788	spin_unlock_irqrestore(&mz->lru_lock, flags);