memcg: simplify LRU handling by new rule

Now, at LRU handling, memory cgroup needs to do complicated works to see valid pc->mem_cgroup, which may be overwritten. This patch is for relaxing the protocol. This patch guarantees - when pc->mem_cgroup is overwritten, page must not be on LRU. By this, LRU routine can believe pc->mem_cgroup and don't need to check bits on pc->flags. This new rule may adds small overheads to swapin. But in most case, lru handling gets faster. After this patch, PCG_ACCT_LRU bit is obsolete and removed. [akpm@linux-foundation.org: remove unneeded VM_BUG_ON(), restore hannes's christmas tree] [akpm@linux-foundation.org: clean up code comment] [hughd@google.com: fix NULL mem_cgroup_try_charge] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Miklos Szeredi <mszeredi@suse.cz> Acked-by: Michal Hocko <mhocko@suse.cz> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Ying Han <yinghan@google.com> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> 2012-01-12 20:19:01 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-01-12 23:13:07 -0500
commit: 38c5d72f3ebe5ddd57d2f08dc035070fc6c9a287 (patch)
tree: 6461c0dfec95dccc92191f059fbe99b5660a8860
parent: 4e5f01c2b9b94321992acb09c35d34f5ee5bb274 (diff)
2 files changed, 54 insertions, 77 deletions
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 1153095ee457..a2d11771c84b 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -10,8 +10,6 @@ enum {
        /* flags for mem_cgroup and file and I/O status */
        PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
        PCG_FILE_MAPPED, /* page is accounted as "mapped" */
-        /* No lock in page_cgroup */
-        PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
        __NR_PCG_FLAGS,
 };
@@ -75,12 +73,6 @@ TESTPCGFLAG(Used, USED)
 CLEARPCGFLAG(Used, USED)
 SETPCGFLAG(Used, USED)
-SETPCGFLAG(AcctLRU, ACCT_LRU)
-CLEARPCGFLAG(AcctLRU, ACCT_LRU)
-TESTPCGFLAG(AcctLRU, ACCT_LRU)
-TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU)
 SETPCGFLAG(FileMapped, FILE_MAPPED)
 CLEARPCGFLAG(FileMapped, FILE_MAPPED)
 TESTPCGFLAG(FileMapped, FILE_MAPPED)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c74102d6eb5a..ff051ee8fb4b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1040,30 +1040,7 @@ struct lruvec *mem_cgroup_lru_add_list(struct zone *zone, struct page *page,
                return &zone->lruvec;
        pc = lookup_page_cgroup(page);
-        VM_BUG_ON(PageCgroupAcctLRU(pc));
+        memcg = pc->mem_cgroup;
-        /*
-         * putback:                             charge:
-         * SetPageLRU                           SetPageCgroupUsed
-         * smp_mb                               smp_mb
-         * PageCgroupUsed && add to memcg LRU   PageLRU && add to memcg LRU
-         *
-         * Ensure that one of the two sides adds the page to the memcg
-         * LRU during a race.
-         */
-        smp_mb();
-        /*
-         * If the page is uncharged, it may be freed soon, but it
-         * could also be swap cache (readahead, swapoff) that needs to
-         * be reclaimable in the future.  root_mem_cgroup will babysit
-         * it for the time being.
-         */
-        if (PageCgroupUsed(pc)) {
-                /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
-                smp_rmb();
-                memcg = pc->mem_cgroup;
-                SetPageCgroupAcctLRU(pc);
-        } else
-                memcg = root_mem_cgroup;
        mz = page_cgroup_zoneinfo(memcg, page);
        /* compound_order() is stabilized through lru_lock */
        MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
@@ -1090,18 +1067,8 @@ void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
                return;
        pc = lookup_page_cgroup(page);
-        /*
+        memcg = pc->mem_cgroup;
-         * root_mem_cgroup babysits uncharged LRU pages, but
+        VM_BUG_ON(!memcg);
-         * PageCgroupUsed is cleared when the page is about to get
-         * freed.  PageCgroupAcctLRU remembers whether the
-         * LRU-accounting happened against pc->mem_cgroup or
-         * root_mem_cgroup.
-         */
-        if (TestClearPageCgroupAcctLRU(pc)) {
-                VM_BUG_ON(!pc->mem_cgroup);
-                memcg = pc->mem_cgroup;
-        } else
-                memcg = root_mem_cgroup;
        mz = page_cgroup_zoneinfo(memcg, page);
        /* huge page split is done under lru_lock. so, we have no races. */
        MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
@@ -2217,8 +2184,25 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 }
 /*
- * Unlike exported interface, "oom" parameter is added. if oom==true,
+ * __mem_cgroup_try_charge() does
- * oom-killer can be invoked.
+ * 1. detect memcg to be charged against from passed *mm and *ptr,
+ * 2. update res_counter
+ * 3. call memory reclaim if necessary.
+ *
+ * In some special case, if the task is fatal, fatal_signal_pending() or
+ * has TIF_MEMDIE, this function returns -EINTR while writing root_mem_cgroup
+ * to *ptr. There are two reasons for this. 1: fatal threads should quit as soon
+ * as possible without any hazards. 2: all pages should have a valid
+ * pc->mem_cgroup. If mm is NULL and the caller doesn't pass a valid memcg
+ * pointer, that is treated as a charge to root_mem_cgroup.
+ *
+ * So __mem_cgroup_try_charge() will return
+ *  0       ...  on success, filling *ptr with a valid memcg pointer.
+ *  -ENOMEM ...  charge failure because of resource limits.
+ *  -EINTR  ...  if thread is fatal. *ptr is filled with root_mem_cgroup.
+ *
+ * Unlike the exported interface, an "oom" parameter is added. if oom==true,
+ * the oom-killer can be invoked.
 */
 static int __mem_cgroup_try_charge(struct mm_struct *mm,
                                   gfp_t gfp_mask,
@@ -2247,7 +2231,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
         * set, if so charge the init_mm (happens for pagecache usage).
         */
        if (!*ptr && !mm)
-                goto bypass;
+                *ptr = root_mem_cgroup;
 again:
        if (*ptr) { /* css should be a valid one */
                memcg = *ptr;
@@ -2273,7 +2257,9 @@ again:
                 * task-struct. So, mm->owner can be NULL.
                 */
                memcg = mem_cgroup_from_task(p);
-                if (!memcg || mem_cgroup_is_root(memcg)) {
+                if (!memcg)
+                        memcg = root_mem_cgroup;
+                if (mem_cgroup_is_root(memcg)) {
                        rcu_read_unlock();
                        goto done;
                }
@@ -2348,8 +2334,8 @@ nomem:
        *ptr = NULL;
        return -ENOMEM;
 bypass:
-        *ptr = NULL;
+        *ptr = root_mem_cgroup;
-        return 0;
+        return -EINTR;
 }
 /*
@@ -2457,6 +2443,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
        mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
        unlock_page_cgroup(pc);
+        WARN_ON_ONCE(PageLRU(page));
        /*
         * "charge_statistics" updated event counter. Then, check it.
         * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
@@ -2468,7 +2455,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
-                        (1 << PCG_ACCT_LRU) | (1 << PCG_MIGRATION))
+                        (1 << PCG_MIGRATION))
 /*
 * Because tail pages are not marked as "used", set it. We're under
 * zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -2478,7 +2465,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 void mem_cgroup_split_huge_fixup(struct page *head)
 {
        struct page_cgroup *head_pc = lookup_page_cgroup(head);
+        struct mem_cgroup_per_zone *mz;
        struct page_cgroup *pc;
+        enum lru_list lru;
        int i;
        if (mem_cgroup_disabled())
@@ -2487,23 +2476,15 @@ void mem_cgroup_split_huge_fixup(struct page *head)
                pc = head_pc + i;
                pc->mem_cgroup = head_pc->mem_cgroup;
                smp_wmb();/* see __commit_charge() */
-                /*
-                 * LRU flags cannot be copied because we need to add tail
-                 * page to LRU by generic call and our hooks will be called.
-                 */
                pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
        }
+        /*
-        if (PageCgroupAcctLRU(head_pc)) {
+         * Tail pages will be added to LRU.
-                enum lru_list lru;
+         * We hold lru_lock,then,reduce counter directly.
-                struct mem_cgroup_per_zone *mz;
+         */
-                /*
+        lru = page_lru(head);
-                 * We hold lru_lock, then, reduce counter directly.
+        mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
-                 */
+        MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1;
-                lru = page_lru(head);
-                mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
-                MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1;
-        }
 }
 #endif
@@ -2620,7 +2601,7 @@ static int mem_cgroup_move_parent(struct page *page,
        parent = mem_cgroup_from_cont(pcg);
        ret = __mem_cgroup_try_charge(NULL, gfp_mask, nr_pages, &parent, false);
-        if (ret || !parent)
+        if (ret)
                goto put_back;
        if (nr_pages > 1)
@@ -2667,9 +2648,8 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
        pc = lookup_page_cgroup(page);
        ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
-        if (ret || !memcg)
+        if (ret == -ENOMEM)
                return ret;
        __mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);
        return 0;
 }
@@ -2736,10 +2716,9 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
        if (!page_is_file_cache(page))
                type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
-        if (!PageSwapCache(page)) {
+        if (!PageSwapCache(page))
                ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);
-                WARN_ON_ONCE(PageLRU(page));
+        else { /* page is swapcache/shmem */
-        } else { /* page is swapcache/shmem */
                ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg);
                if (!ret)
                        __mem_cgroup_commit_charge_swapin(page, memcg, type);
@@ -2781,11 +2760,16 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
        *memcgp = memcg;
        ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true);
        css_put(&memcg->css);
+        if (ret == -EINTR)
+                ret = 0;
        return ret;
 charge_cur_mm:
        if (unlikely(!mm))
                mm = &init_mm;
-        return __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
+        ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
+        if (ret == -EINTR)
+                ret = 0;
+        return ret;
 }
 static void
@@ -3245,7 +3229,7 @@ int mem_cgroup_prepare_migration(struct page *page,
        *memcgp = memcg;
        ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);
        css_put(&memcg->css);/* drop extra refcnt */
-        if (ret || *memcgp == NULL) {
+        if (ret) {
                if (PageAnon(page)) {
                        lock_page_cgroup(pc);
                        ClearPageCgroupMigration(pc);
@@ -3255,6 +3239,7 @@ int mem_cgroup_prepare_migration(struct page *page,
                         */
                        mem_cgroup_uncharge_page(page);
                }
+                /* we'll need to revisit this error code (we have -EINTR) */
                return -ENOMEM;
        }
        /*
@@ -3674,7 +3659,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
                pc = lookup_page_cgroup(page);
                ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);
-                if (ret == -ENOMEM)
+                if (ret == -ENOMEM || ret == -EINTR)
                        break;
                if (ret == -EBUSY || ret == -EINVAL) {
@@ -5065,9 +5050,9 @@ one_by_one:
                }
                ret = __mem_cgroup_try_charge(NULL,
                                        GFP_KERNEL, 1, &memcg, false);
-                if (ret || !memcg)
+                if (ret)
                        /* mem_cgroup_clear_mc() will do uncharge later */
-                        return -ENOMEM;
+                        return ret;
                mc.precharge++;
        }
        return ret;
author	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>	2012-01-12 20:19:01 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-01-12 23:13:07 -0500
commit	38c5d72f3ebe5ddd57d2f08dc035070fc6c9a287 (patch)
tree	6461c0dfec95dccc92191f059fbe99b5660a8860
parent	4e5f01c2b9b94321992acb09c35d34f5ee5bb274 (diff)

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index 1153095ee457..a2d11771c84b 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h
@@ -10,8 +10,6 @@ enum {
10	/* flags for mem_cgroup and file and I/O status */	10	/* flags for mem_cgroup and file and I/O status */
11	PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */	11	PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
12	PCG_FILE_MAPPED, /* page is accounted as "mapped" */	12	PCG_FILE_MAPPED, /* page is accounted as "mapped" */
13	/* No lock in page_cgroup */
14	PCG_ACCT_LRU, /* page has been accounted for (under lru_lock) */
15	__NR_PCG_FLAGS,	13	__NR_PCG_FLAGS,
16	};	14	};
17		15
@@ -75,12 +73,6 @@ TESTPCGFLAG(Used, USED)
75	CLEARPCGFLAG(Used, USED)	73	CLEARPCGFLAG(Used, USED)
76	SETPCGFLAG(Used, USED)	74	SETPCGFLAG(Used, USED)
77		75
78	SETPCGFLAG(AcctLRU, ACCT_LRU)
79	CLEARPCGFLAG(AcctLRU, ACCT_LRU)
80	TESTPCGFLAG(AcctLRU, ACCT_LRU)
81	TESTCLEARPCGFLAG(AcctLRU, ACCT_LRU)
82
83
84	SETPCGFLAG(FileMapped, FILE_MAPPED)	76	SETPCGFLAG(FileMapped, FILE_MAPPED)
85	CLEARPCGFLAG(FileMapped, FILE_MAPPED)	77	CLEARPCGFLAG(FileMapped, FILE_MAPPED)
86	TESTPCGFLAG(FileMapped, FILE_MAPPED)	78	TESTPCGFLAG(FileMapped, FILE_MAPPED)


diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c74102d6eb5a..ff051ee8fb4b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -1040,30 +1040,7 @@ struct lruvec mem_cgroup_lru_add_list(struct zone zone, struct page *page,
1040	return &zone->lruvec;	1040	return &zone->lruvec;
1041		1041
1042	pc = lookup_page_cgroup(page);	1042	pc = lookup_page_cgroup(page);
1043	VM_BUG_ON(PageCgroupAcctLRU(pc));	1043	memcg = pc->mem_cgroup;
1044	/*
1045	* putback: charge:
1046	* SetPageLRU SetPageCgroupUsed
1047	* smp_mb smp_mb
1048	* PageCgroupUsed && add to memcg LRU PageLRU && add to memcg LRU
1049	*
1050	* Ensure that one of the two sides adds the page to the memcg
1051	* LRU during a race.
1052	*/
1053	smp_mb();
1054	/*
1055	* If the page is uncharged, it may be freed soon, but it
1056	* could also be swap cache (readahead, swapoff) that needs to
1057	* be reclaimable in the future. root_mem_cgroup will babysit
1058	* it for the time being.
1059	*/
1060	if (PageCgroupUsed(pc)) {
1061	/* Ensure pc->mem_cgroup is visible after reading PCG_USED. */
1062	smp_rmb();
1063	memcg = pc->mem_cgroup;
1064	SetPageCgroupAcctLRU(pc);
1065	} else
1066	memcg = root_mem_cgroup;
1067	mz = page_cgroup_zoneinfo(memcg, page);	1044	mz = page_cgroup_zoneinfo(memcg, page);
1068	/* compound_order() is stabilized through lru_lock */	1045	/* compound_order() is stabilized through lru_lock */
1069	MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);	1046	MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page);
@@ -1090,18 +1067,8 @@ void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
1090	return;	1067	return;
1091		1068
1092	pc = lookup_page_cgroup(page);	1069	pc = lookup_page_cgroup(page);
1093	/*	1070	memcg = pc->mem_cgroup;
1094	* root_mem_cgroup babysits uncharged LRU pages, but	1071	VM_BUG_ON(!memcg);
1095	* PageCgroupUsed is cleared when the page is about to get
1096	* freed. PageCgroupAcctLRU remembers whether the
1097	* LRU-accounting happened against pc->mem_cgroup or
1098	* root_mem_cgroup.
1099	*/
1100	if (TestClearPageCgroupAcctLRU(pc)) {
1101	VM_BUG_ON(!pc->mem_cgroup);
1102	memcg = pc->mem_cgroup;
1103	} else
1104	memcg = root_mem_cgroup;
1105	mz = page_cgroup_zoneinfo(memcg, page);	1072	mz = page_cgroup_zoneinfo(memcg, page);
1106	/* huge page split is done under lru_lock. so, we have no races. */	1073	/* huge page split is done under lru_lock. so, we have no races. */
1107	MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);	1074	MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
@@ -2217,8 +2184,25 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
2217	}	2184	}
2218		2185
2219	/*	2186	/*
2220	* Unlike exported interface, "oom" parameter is added. if oom==true,	2187	* __mem_cgroup_try_charge() does
2221	* oom-killer can be invoked.	2188	* 1. detect memcg to be charged against from passed mm and ptr,
		2189	* 2. update res_counter
		2190	* 3. call memory reclaim if necessary.
		2191	*
		2192	* In some special case, if the task is fatal, fatal_signal_pending() or
		2193	* has TIF_MEMDIE, this function returns -EINTR while writing root_mem_cgroup
		2194	* to *ptr. There are two reasons for this. 1: fatal threads should quit as soon
		2195	* as possible without any hazards. 2: all pages should have a valid
		2196	* pc->mem_cgroup. If mm is NULL and the caller doesn't pass a valid memcg
		2197	* pointer, that is treated as a charge to root_mem_cgroup.
		2198	*
		2199	* So __mem_cgroup_try_charge() will return
		2200	* 0 ... on success, filling *ptr with a valid memcg pointer.
		2201	* -ENOMEM ... charge failure because of resource limits.
		2202	* -EINTR ... if thread is fatal. *ptr is filled with root_mem_cgroup.
		2203	*
		2204	* Unlike the exported interface, an "oom" parameter is added. if oom==true,
		2205	* the oom-killer can be invoked.
2222	*/	2206	*/
2223	static int __mem_cgroup_try_charge(struct mm_struct *mm,	2207	static int __mem_cgroup_try_charge(struct mm_struct *mm,
2224	gfp_t gfp_mask,	2208	gfp_t gfp_mask,
@@ -2247,7 +2231,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
2247	* set, if so charge the init_mm (happens for pagecache usage).	2231	* set, if so charge the init_mm (happens for pagecache usage).
2248	*/	2232	*/
2249	if (!*ptr && !mm)	2233	if (!*ptr && !mm)
2250	goto bypass;	2234	*ptr = root_mem_cgroup;
2251	again:	2235	again:
2252	if (ptr) { / css should be a valid one */	2236	if (ptr) { / css should be a valid one */
2253	memcg = *ptr;	2237	memcg = *ptr;
@@ -2273,7 +2257,9 @@ again:
2273	* task-struct. So, mm->owner can be NULL.	2257	* task-struct. So, mm->owner can be NULL.
2274	*/	2258	*/
2275	memcg = mem_cgroup_from_task(p);	2259	memcg = mem_cgroup_from_task(p);
2276	if (!memcg \|\| mem_cgroup_is_root(memcg)) {	2260	if (!memcg)
		2261	memcg = root_mem_cgroup;
		2262	if (mem_cgroup_is_root(memcg)) {
2277	rcu_read_unlock();	2263	rcu_read_unlock();
2278	goto done;	2264	goto done;
2279	}	2265	}
@@ -2348,8 +2334,8 @@ nomem:
2348	*ptr = NULL;	2334	*ptr = NULL;
2349	return -ENOMEM;	2335	return -ENOMEM;
2350	bypass:	2336	bypass:
2351	*ptr = NULL;	2337	*ptr = root_mem_cgroup;
2352	return 0;	2338	return -EINTR;
2353	}	2339	}
2354		2340
2355	/*	2341	/*
@@ -2457,6 +2443,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2457		2443
2458	mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);	2444	mem_cgroup_charge_statistics(memcg, PageCgroupCache(pc), nr_pages);
2459	unlock_page_cgroup(pc);	2445	unlock_page_cgroup(pc);
		2446	WARN_ON_ONCE(PageLRU(page));
2460	/*	2447	/*
2461	* "charge_statistics" updated event counter. Then, check it.	2448	* "charge_statistics" updated event counter. Then, check it.
2462	* Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.	2449	* Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
@@ -2468,7 +2455,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2468	#ifdef CONFIG_TRANSPARENT_HUGEPAGE	2455	#ifdef CONFIG_TRANSPARENT_HUGEPAGE
2469		2456
2470	#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) \| (1 << PCG_MOVE_LOCK) \|\	2457	#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) \| (1 << PCG_MOVE_LOCK) \|\
2471	(1 << PCG_ACCT_LRU) \| (1 << PCG_MIGRATION))	2458	(1 << PCG_MIGRATION))
2472	/*	2459	/*
2473	* Because tail pages are not marked as "used", set it. We're under	2460	* Because tail pages are not marked as "used", set it. We're under
2474	* zone->lru_lock, 'splitting on pmd' and compound_lock.	2461	* zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -2478,7 +2465,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2478	void mem_cgroup_split_huge_fixup(struct page *head)	2465	void mem_cgroup_split_huge_fixup(struct page *head)
2479	{	2466	{
2480	struct page_cgroup *head_pc = lookup_page_cgroup(head);	2467	struct page_cgroup *head_pc = lookup_page_cgroup(head);
		2468	struct mem_cgroup_per_zone *mz;
2481	struct page_cgroup *pc;	2469	struct page_cgroup *pc;
		2470	enum lru_list lru;
2482	int i;	2471	int i;
2483		2472
2484	if (mem_cgroup_disabled())	2473	if (mem_cgroup_disabled())
@@ -2487,23 +2476,15 @@ void mem_cgroup_split_huge_fixup(struct page *head)
2487	pc = head_pc + i;	2476	pc = head_pc + i;
2488	pc->mem_cgroup = head_pc->mem_cgroup;	2477	pc->mem_cgroup = head_pc->mem_cgroup;
2489	smp_wmb();/* see __commit_charge() */	2478	smp_wmb();/* see __commit_charge() */
2490	/*
2491	* LRU flags cannot be copied because we need to add tail
2492	* page to LRU by generic call and our hooks will be called.
2493	*/
2494	pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;	2479	pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
2495	}	2480	}
2496		2481	/*
2497	if (PageCgroupAcctLRU(head_pc)) {	2482	* Tail pages will be added to LRU.
2498	enum lru_list lru;	2483	* We hold lru_lock,then,reduce counter directly.
2499	struct mem_cgroup_per_zone *mz;	2484	*/
2500	/*	2485	lru = page_lru(head);
2501	* We hold lru_lock, then, reduce counter directly.	2486	mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
2502	*/	2487	MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1;
2503	lru = page_lru(head);
2504	mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
2505	MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1;
2506	}
2507	}	2488	}
2508	#endif	2489	#endif
2509		2490
@@ -2620,7 +2601,7 @@ static int mem_cgroup_move_parent(struct page *page,
2620		2601
2621	parent = mem_cgroup_from_cont(pcg);	2602	parent = mem_cgroup_from_cont(pcg);
2622	ret = __mem_cgroup_try_charge(NULL, gfp_mask, nr_pages, &parent, false);	2603	ret = __mem_cgroup_try_charge(NULL, gfp_mask, nr_pages, &parent, false);
2623	if (ret \|\| !parent)	2604	if (ret)
2624	goto put_back;	2605	goto put_back;
2625		2606
2626	if (nr_pages > 1)	2607	if (nr_pages > 1)
@@ -2667,9 +2648,8 @@ static int mem_cgroup_charge_common(struct page page, struct mm_struct mm,
2667		2648
2668	pc = lookup_page_cgroup(page);	2649	pc = lookup_page_cgroup(page);
2669	ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);	2650	ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
2670	if (ret \|\| !memcg)	2651	if (ret == -ENOMEM)
2671	return ret;	2652	return ret;
2672
2673	__mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);	2653	__mem_cgroup_commit_charge(memcg, page, nr_pages, pc, ctype);
2674	return 0;	2654	return 0;
2675	}	2655	}
@@ -2736,10 +2716,9 @@ int mem_cgroup_cache_charge(struct page page, struct mm_struct mm,
2736	if (!page_is_file_cache(page))	2716	if (!page_is_file_cache(page))
2737	type = MEM_CGROUP_CHARGE_TYPE_SHMEM;	2717	type = MEM_CGROUP_CHARGE_TYPE_SHMEM;
2738		2718
2739	if (!PageSwapCache(page)) {	2719	if (!PageSwapCache(page))
2740	ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);	2720	ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);
2741	WARN_ON_ONCE(PageLRU(page));	2721	else { /* page is swapcache/shmem */
2742	} else { /* page is swapcache/shmem */
2743	ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg);	2722	ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &memcg);
2744	if (!ret)	2723	if (!ret)
2745	__mem_cgroup_commit_charge_swapin(page, memcg, type);	2724	__mem_cgroup_commit_charge_swapin(page, memcg, type);
@@ -2781,11 +2760,16 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
2781	*memcgp = memcg;	2760	*memcgp = memcg;
2782	ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true);	2761	ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true);
2783	css_put(&memcg->css);	2762	css_put(&memcg->css);
		2763	if (ret == -EINTR)
		2764	ret = 0;
2784	return ret;	2765	return ret;
2785	charge_cur_mm:	2766	charge_cur_mm:
2786	if (unlikely(!mm))	2767	if (unlikely(!mm))
2787	mm = &init_mm;	2768	mm = &init_mm;
2788	return __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);	2769	ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true);
		2770	if (ret == -EINTR)
		2771	ret = 0;
		2772	return ret;
2789	}	2773	}
2790		2774
2791	static void	2775	static void
@@ -3245,7 +3229,7 @@ int mem_cgroup_prepare_migration(struct page *page,
3245	*memcgp = memcg;	3229	*memcgp = memcg;
3246	ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);	3230	ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, memcgp, false);
3247	css_put(&memcg->css);/* drop extra refcnt */	3231	css_put(&memcg->css);/* drop extra refcnt */
3248	if (ret \|\| *memcgp == NULL) {	3232	if (ret) {
3249	if (PageAnon(page)) {	3233	if (PageAnon(page)) {
3250	lock_page_cgroup(pc);	3234	lock_page_cgroup(pc);
3251	ClearPageCgroupMigration(pc);	3235	ClearPageCgroupMigration(pc);
@@ -3255,6 +3239,7 @@ int mem_cgroup_prepare_migration(struct page *page,
3255	*/	3239	*/
3256	mem_cgroup_uncharge_page(page);	3240	mem_cgroup_uncharge_page(page);
3257	}	3241	}
		3242	/* we'll need to revisit this error code (we have -EINTR) */
3258	return -ENOMEM;	3243	return -ENOMEM;
3259	}	3244	}
3260	/*	3245	/*
@@ -3674,7 +3659,7 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
3674	pc = lookup_page_cgroup(page);	3659	pc = lookup_page_cgroup(page);
3675		3660
3676	ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);	3661	ret = mem_cgroup_move_parent(page, pc, memcg, GFP_KERNEL);
3677	if (ret == -ENOMEM)	3662	if (ret == -ENOMEM \|\| ret == -EINTR)
3678	break;	3663	break;
3679		3664
3680	if (ret == -EBUSY \|\| ret == -EINVAL) {	3665	if (ret == -EBUSY \|\| ret == -EINVAL) {
@@ -5065,9 +5050,9 @@ one_by_one:
5065	}	5050	}
5066	ret = __mem_cgroup_try_charge(NULL,	5051	ret = __mem_cgroup_try_charge(NULL,
5067	GFP_KERNEL, 1, &memcg, false);	5052	GFP_KERNEL, 1, &memcg, false);
5068	if (ret \|\| !memcg)	5053	if (ret)
5069	/* mem_cgroup_clear_mc() will do uncharge later */	5054	/* mem_cgroup_clear_mc() will do uncharge later */
5070	return -ENOMEM;	5055	return ret;
5071	mc.precharge++;	5056	mc.precharge++;
5072	}	5057	}
5073	return ret;	5058	return ret;